{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "8b4ab64a-c1b6-4247-bb4b-9c72ebf63778",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-04-28T07:23:58.235873Z",
     "start_time": "2025-04-28T07:23:58.177330Z"
    }
   },
   "outputs": [],
   "source": [
    "import numpy as np     #只需要下载numpy库即可\n",
    "import random\n",
    "import GridWorld_v2\n",
    "import time\n",
    "from IPython.display import clear_output"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "ad418bf1-1806-4c6d-830a-498b9c371405",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-04-28T07:23:58.239338Z",
     "start_time": "2025-04-28T07:23:58.235873Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "⬜️⬜️⬜️⬜️⬜️\n",
      "⬜️🚫🚫⬜️⬜️\n",
      "⬜️⬜️🚫⬜️⬜️\n",
      "⬜️🚫✅🚫⬜️\n",
      "⬜️🚫⬜️⬜️⬜️\n"
     ]
    }
   ],
   "source": [
    "rows = 5      #记得行数和列数这里要同步改\n",
    "columns = 5\n",
    "gridworld = GridWorld_v2.GridWorld_v2(forbiddenAreaScore=-10, score=1,desc = [\".....\",\".##..\",\"..#..\",\".#T#.\",\".#...\"]) \n",
    "\n",
    "gridworld.show()\n",
    "value = np.zeros(rows*columns)       #初始化可以任意，也可以全0\n",
    "qtable = np.zeros((rows*columns,5))  #初始化，这里主要是初始化维数，里面的内容会被覆盖所以无所谓"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "25906946-b972-4f9c-a97a-7671c77aa710",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-04-28T07:23:58.245381Z",
     "start_time": "2025-04-28T07:23:58.239338Z"
    }
   },
   "outputs": [],
   "source": [
    "def SARSA(gridworld:GridWorld_v2.GridWorld_v2,gamma = 0.99,trajectorySteps=-1, learning_rate=0.001, final_epsilon=0.01, num_episodes=600)->GridWorld_v2.GridWorld_v2:\n",
    "    \"\"\"\n",
    "    这是最基础的SARSA算法\n",
    "\n",
    "    Parameters:\n",
    "    trajectorySteps (int): 寻路的轨迹长度，如果是-1，则为寻到目的则停止，否则参数即为trajectory长度\n",
    "    learning_rate (float): 学习率，用于调节TD-target\n",
    "    epsilon (float): epsilon-greedy的核心参数，0~1的浮点数，其中1则表示当前state所有决策概率一样，0则表示决策没有任何的随机性\n",
    "    num_episodes (int): 表示模型迭代次数\n",
    "\n",
    "    Returns:\n",
    "    GridWorld_v2.GridWorld_v2: 把模型返回回去\n",
    "    \"\"\"\n",
    "    \n",
    "    state_value = np.zeros((rows * columns))  # 初始化状态价值函数为0\n",
    "    action_value = np.zeros((rows * columns, 5))  # 初始化动作价值函数Q表为0\n",
    "    policy = np.eye(5)[np.random.randint(0,5,size=(rows*columns))]  # 随机初始化策略，使用独热编码表示\n",
    "    epsilon = 0.5  # 初始化epsilon值为0.5，用于epsilon-greedy策略\n",
    "    for episode in range(num_episodes):  # 循环迭代指定次数\n",
    "        #清除输出，可以更好的展示策略\n",
    "        # time.sleep(0.2)\n",
    "        # clear_output(wait=True)\n",
    "        \n",
    "        print(\"episode\",f\"{episode}/{num_episodes}\")  # 打印当前迭代次数\n",
    "        if(epsilon > final_epsilon) :  # 如果当前epsilon大于最终epsilon值\n",
    "            epsilon -= 0.001  # 则逐渐减小epsilon值\n",
    "        else:\n",
    "            epsilon = final_epsilon  # 否则保持epsilon为最终值\n",
    "\n",
    "        # p1是目标方向的概率，p0是另外四个方向的概率\n",
    "        p1 = 1-epsilon * (4/5)  # 计算选择最优动作的概率\n",
    "        p0 = epsilon/5  # 计算选择其他动作的概率\n",
    "        d = {1:p1, 0:p0}  # 创建概率字典，用于向量化操作\n",
    "        # policy_epsilon是policy取epsilon-greedy的概率决策\n",
    "        print(\"p1\",p1,\"p0\",p0)  # 打印当前的概率值\n",
    "        policy_epsilon = np.vectorize(d.get)(policy)  # 将策略转换为epsilon-greedy概率形式\n",
    "\n",
    "        #cnt数组用来检查每个state有多少次访问\n",
    "        cnt = [0 for i in range(25)]  # 初始化访问计数器\n",
    "        \n",
    "        initState=10  # 设置初始状态为10\n",
    "        initAction=random.randint(0,4)  # 随机选择初始动作\n",
    "\n",
    "        if trajectorySteps==-1:  # 如果轨迹步数为-1\n",
    "            stop_when_reach_target = True  # 则设置到达目标时停止\n",
    "        Trajectory = gridworld.getTrajectoryScore(nowState=initState, \n",
    "                                                  action=initAction, \n",
    "                                                  policy=policy_epsilon, \n",
    "                                                  steps=trajectorySteps, \n",
    "                                                  stop_when_reach_target=True)  # 获取轨迹\n",
    "        Trajectory.append((17,4,1,17,4))  # 添加一个自循环状态，确保最后的奖励被更新，因为目标位置（对号)在17,为了目标位置的奖励被更新\n",
    "        print(\"trajectorySteps\",len(Trajectory))  # 打印轨迹长度\n",
    "        \n",
    "\n",
    "        \n",
    "        # 注意这里的返回值是大小为(trajectorySteps+1)的元组列表，因为把第一个动作也加入进去了\n",
    "        steps = len(Trajectory) - 1  # 计算实际步数\n",
    "        for k in range(steps,-1,-1):  # 从后向前遍历轨迹\n",
    "            #State，Action，Reward，NextState，NextAction\n",
    "            tmpstate, tmpaction, tmpscore, nextState, nextAction  = Trajectory[k]  # 解包当前步骤的信息\n",
    "            cnt[tmpstate] += 1  # 增加该状态的访问计数\n",
    "            #SARSA，根据公式更新action_value\n",
    "            TD_error = action_value[tmpstate][tmpaction] - (tmpscore + gamma * action_value[nextState][nextAction])  # 计算TD误差\n",
    "            action_value[tmpstate][tmpaction] -= learning_rate * TD_error  # 使用TD误差更新Q值\n",
    "\n",
    "        # policy improvement\n",
    "        policy = np.eye(5)[np.argmax(action_value,axis=1)]  # 策略改进：选择Q值最大的动作作为新策略\n",
    "        policy_epsilon = np.vectorize(d.get)(policy)  # 将新策略转换为epsilon-greedy形式\n",
    "    \n",
    "        #输出每个state的访问次数\n",
    "        print(np.array(cnt).reshape(5,5))  # 打印状态访问次数矩阵\n",
    "\n",
    "        state_value = np.sum(policy_epsilon * action_value,axis=1)  # 计算状态价值函数\n",
    "        mean_state_value = np.sum(policy_epsilon * action_value,axis=1).mean()  # 计算平均状态价值\n",
    "        \n",
    "        gridworld.showPolicy(policy)  # 显示当前策略\n",
    "        print(np.round(state_value,decimals=4).reshape(5,5))  # 打印状态价值矩阵（保留4位小数）\n",
    "        print(\"mean_state_value\", mean_state_value)  # 打印平均状态价值\n",
    "\n",
    "    return action_value  # 返回最终的动作价值函数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "56f4a5d3-6bf7-427c-81a8-638bd18a97b9",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-04-28T07:24:01.719736Z",
     "start_time": "2025-04-28T07:24:00.161653Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "episode 0/600\n",
      "p1 0.6008 p0 0.0998\n",
      "trajectorySteps 28\n",
      "[[0 0 1 4 2]\n",
      " [0 0 1 2 2]\n",
      " [1 3 2 7 1]\n",
      " [0 0 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "⬆️⬆️⬆️⬆️⬆️\n",
      "⬆️⏫️⏫️⬇️➡️\n",
      "⬆️⬆️⏬➡️➡️\n",
      "⬆️⏫️✅⏫️⬆️\n",
      "⬆️⏫️⬆️⬆️⬆️\n",
      "[[ 0.      0.     -0.001  -0.     -0.    ]\n",
      " [ 0.      0.     -0.001  -0.     -0.    ]\n",
      " [-0.     -0.001   0.0006 -0.     -0.    ]\n",
      " [ 0.      0.     -0.0004  0.      0.    ]\n",
      " [ 0.      0.      0.      0.      0.    ]]\n",
      "mean_state_value -0.0001119686794262584\n",
      "episode 1/600\n",
      "p1 0.6015999999999999 p0 0.0996\n",
      "trajectorySteps 301\n",
      "[[57 50 23 17 30]\n",
      " [13  9  2  7 39]\n",
      " [ 3  3  1  9 34]\n",
      " [ 0  0  2  1  1]\n",
      " [ 0  0  0  0  0]]\n",
      "⬇️🔄➡️🔄🔄\n",
      "⬇️⏪⏪⬅️⬅️\n",
      "⬇️⬇️⏬⬆️⬇️\n",
      "⬆️⏫️✅⏩️➡️\n",
      "⬆️⏫️⬆️⬆️⬆️\n",
      "[[-0.0041 -0.0085 -0.0035 -0.001  -0.0016]\n",
      " [-0.0022 -0.002  -0.001  -0.     -0.0026]\n",
      " [-0.     -0.002   0.0012 -0.002  -0.0023]\n",
      " [ 0.      0.     -0.0008 -0.     -0.    ]\n",
      " [ 0.      0.      0.      0.      0.    ]]\n",
      "mean_state_value -0.0012931055122094902\n",
      "episode 2/600\n",
      "p1 0.6024 p0 0.0994\n",
      "trajectorySteps 106\n",
      "[[ 5  0  0  0  0]\n",
      " [12  2  0  0  0]\n",
      " [36  8  0  0  0]\n",
      " [32  6  2  0  0]\n",
      " [ 3  0  0  0  0]]\n",
      "⬇️🔄➡️🔄🔄\n",
      "⬇️⏬⏪⬅️⬅️\n",
      "⬆️⬅️⏬⬆️⬇️\n",
      "🔄⏩️✅⏩️➡️\n",
      "➡️⏫️⬆️⬆️⬆️\n",
      "[[-0.0043 -0.0085 -0.0035 -0.001  -0.0016]\n",
      " [-0.0036 -0.002  -0.001  -0.     -0.0026]\n",
      " [-0.0002 -0.007   0.0012 -0.002  -0.0023]\n",
      " [-0.0023  0.0006  0.0004 -0.     -0.    ]\n",
      " [-0.0001  0.      0.      0.      0.    ]]\n",
      "mean_state_value -0.0015846864161249213\n",
      "episode 3/600\n",
      "p1 0.6032 p0 0.0992\n",
      "trajectorySteps 52\n",
      "[[ 2  1  0  0  0]\n",
      " [15  1  0  0  0]\n",
      " [21  5  3  1  0]\n",
      " [ 1  0  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "⬇️🔄➡️🔄🔄\n",
      "⬇️⏬⏪⬅️⬅️\n",
      "🔄⬅️⏬⬆️⬇️\n",
      "🔄⏩️✅⏩️➡️\n",
      "➡️⏫️⬆️⬆️⬆️\n",
      "[[-0.0043 -0.0084 -0.0035 -0.001  -0.0016]\n",
      " [-0.0047 -0.002  -0.001  -0.     -0.0026]\n",
      " [-0.0004 -0.0089  0.0018 -0.003  -0.0023]\n",
      " [-0.0023  0.0006  0.0016 -0.     -0.    ]\n",
      " [-0.0001  0.      0.      0.      0.    ]]\n",
      "mean_state_value -0.0016793140492433755\n",
      "episode 4/600\n",
      "p1 0.604 p0 0.099\n",
      "trajectorySteps 15\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [8 1 0 0 0]\n",
      " [3 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "⬇️🔄➡️🔄🔄\n",
      "⬇️⏬⏪⬅️⬅️\n",
      "🔄⬅️⏬⬆️⬇️\n",
      "⬇️⏩️✅⏩️➡️\n",
      "➡️⏫️⬆️⬆️⬆️\n",
      "[[-0.0043 -0.0084 -0.0035 -0.001  -0.0016]\n",
      " [-0.0047 -0.002  -0.001  -0.     -0.0026]\n",
      " [-0.0004 -0.0089  0.0018 -0.003  -0.0023]\n",
      " [-0.0033  0.0012  0.0028 -0.     -0.    ]\n",
      " [-0.0001  0.      0.      0.      0.    ]]\n",
      "mean_state_value -0.0016421394493096142\n",
      "episode 5/600\n",
      "p1 0.6048 p0 0.0988\n",
      "trajectorySteps 13\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [1 0 0 0 0]\n",
      " [1 3 2 0 0]\n",
      " [2 4 0 0 0]]\n",
      "⬇️🔄➡️🔄🔄\n",
      "⬇️⏬⏪⬅️⬅️\n",
      "🔄⬅️⏬⬆️⬇️\n",
      "⬆️⏩️✅⏩️➡️\n",
      "⬅️⏩️⬆️⬆️⬆️\n",
      "[[-0.0043 -0.0084 -0.0035 -0.001  -0.0016]\n",
      " [-0.0047 -0.002  -0.001  -0.     -0.0026]\n",
      " [-0.0004 -0.0089  0.0018 -0.003  -0.0023]\n",
      " [-0.0033 -0.0002  0.0041 -0.     -0.    ]\n",
      " [-0.0021 -0.003   0.      0.      0.    ]]\n",
      "mean_state_value -0.0018421571389741684\n",
      "episode 6/600\n",
      "p1 0.6055999999999999 p0 0.0986\n",
      "trajectorySteps 132\n",
      "[[ 4  0  0  0  0]\n",
      " [13  1  0  0  0]\n",
      " [79  7  0  0  0]\n",
      " [22  1  2  0  0]\n",
      " [ 3  0  0  0  0]]\n",
      "➡️🔄➡️🔄🔄\n",
      "⬇️⏬⏪⬅️⬅️\n",
      "🔄⬅️⏬⬆️⬇️\n",
      "⬆️⏩️✅⏩️➡️\n",
      "🔄⏩️⬆️⬆️⬆️\n",
      "[[-0.0044 -0.0084 -0.0035 -0.001  -0.0016]\n",
      " [-0.0057 -0.002  -0.001  -0.     -0.0026]\n",
      " [-0.0008 -0.0099  0.0018 -0.003  -0.0023]\n",
      " [-0.0039  0.0005  0.0053 -0.     -0.    ]\n",
      " [-0.0023 -0.003   0.      0.      0.    ]]\n",
      "mean_state_value -0.001899447985358365\n",
      "episode 7/600\n",
      "p1 0.6064 p0 0.0984\n",
      "trajectorySteps 18\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [10  2  0  0  0]\n",
      " [ 3  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️🔄➡️🔄🔄\n",
      "⬇️⏬⏪⬅️⬅️\n",
      "🔄⬅️⏬⬆️⬇️\n",
      "⬆️⏩️✅⏩️➡️\n",
      "🔄⏩️⬆️⬆️⬆️\n",
      "[[-0.0044 -0.0084 -0.0035 -0.001  -0.0016]\n",
      " [-0.0057 -0.002  -0.001  -0.     -0.0026]\n",
      " [-0.0008 -0.0108  0.0018 -0.003  -0.0023]\n",
      " [-0.0039  0.0011  0.0065 -0.     -0.    ]\n",
      " [-0.0023 -0.003   0.      0.      0.    ]]\n",
      "mean_state_value -0.0018643865384824494\n",
      "episode 8/600\n",
      "p1 0.6072 p0 0.0982\n",
      "trajectorySteps 138\n",
      "[[ 5 14  2 12  0]\n",
      " [14  0  0  0  0]\n",
      " [74  6  1  0  0]\n",
      " [ 8  0  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️➡️⬇️🔄\n",
      "⬇️⏬⏪⬅️⬅️\n",
      "⬇️⬅️⏬⬆️⬇️\n",
      "⬇️⏩️✅⏩️➡️\n",
      "🔄⏩️⬆️⬆️⬆️\n",
      "[[-0.0044 -0.0086 -0.0034 -0.0012 -0.0016]\n",
      " [-0.0058 -0.002  -0.001  -0.     -0.0025]\n",
      " [-0.0017 -0.0118  0.0024 -0.003  -0.0023]\n",
      " [-0.0039  0.0011  0.0077 -0.     -0.    ]\n",
      " [-0.0023 -0.0029  0.      0.      0.    ]]\n",
      "mean_state_value -0.0018855630817210531\n",
      "episode 9/600\n",
      "p1 0.608 p0 0.098\n",
      "trajectorySteps 11\n",
      "[[0 0 0 0 0]\n",
      " [1 0 0 0 0]\n",
      " [5 0 0 0 0]\n",
      " [2 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️⬇️🔄\n",
      "⬇️⏬⏪⬅️⬅️\n",
      "⬆️⬅️⏬⬆️⬇️\n",
      "⬇️⏩️✅⏩️➡️\n",
      "🔄⏩️⬆️⬆️⬆️\n",
      "[[-0.0044 -0.0085 -0.0034 -0.0012 -0.0016]\n",
      " [-0.0058 -0.002  -0.001  -0.     -0.0025]\n",
      " [-0.0018 -0.0118  0.0024 -0.0029 -0.0023]\n",
      " [-0.0049  0.0017  0.0074 -0.     -0.    ]\n",
      " [-0.0023 -0.0029  0.      0.      0.    ]]\n",
      "mean_state_value -0.001914301547946607\n",
      "episode 10/600\n",
      "p1 0.6088 p0 0.0978\n",
      "trajectorySteps 14\n",
      "[[0 0 0 0 0]\n",
      " [3 2 0 0 0]\n",
      " [4 2 1 0 0]\n",
      " [0 0 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️⬇️🔄\n",
      "⬇️⏪⏪⬅️⬅️\n",
      "🔄⬅️⏬⬆️⬇️\n",
      "⬇️⏩️✅⏩️➡️\n",
      "🔄⏩️⬆️⬆️⬆️\n",
      "[[-0.0044 -0.0085 -0.0034 -0.0012 -0.0016]\n",
      " [-0.0077 -0.002  -0.001  -0.     -0.0025]\n",
      " [-0.0018 -0.0127  0.003  -0.0029 -0.0023]\n",
      " [-0.0049  0.0017  0.0086 -0.     -0.    ]\n",
      " [-0.0023 -0.0029  0.      0.      0.    ]]\n",
      "mean_state_value -0.00195279103518526\n",
      "episode 11/600\n",
      "p1 0.6095999999999999 p0 0.09759999999999999\n",
      "trajectorySteps 77\n",
      "[[ 0  0  0  0  0]\n",
      " [ 2  0  0  0  0]\n",
      " [28  5  0  0  0]\n",
      " [ 5  1  2  0  0]\n",
      " [31  1  2  0  0]]\n",
      "➡️➡️➡️⬇️🔄\n",
      "⬇️⏪⏪⬅️⬅️\n",
      "🔄⬅️⏬⬆️⬇️\n",
      "⬇️⏩️✅⏩️➡️\n",
      "🔄⏩️⬆️⬆️⬆️\n",
      "[[-0.0044 -0.0085 -0.0034 -0.0012 -0.0016]\n",
      " [-0.0077 -0.002  -0.001  -0.     -0.0025]\n",
      " [-0.0018 -0.0127  0.003  -0.0029 -0.0022]\n",
      " [-0.0059  0.0017  0.0098 -0.     -0.    ]\n",
      " [-0.0038 -0.0029  0.0006  0.      0.    ]]\n",
      "mean_state_value -0.001976226853171824\n",
      "episode 12/600\n",
      "p1 0.6104 p0 0.0974\n",
      "trajectorySteps 14\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [1 0 0 0 0]\n",
      " [3 0 2 0 0]\n",
      " [6 1 1 0 0]]\n",
      "➡️➡️➡️⬇️🔄\n",
      "⬇️⏪⏪⬅️⬅️\n",
      "🔄⬅️⏬⬆️⬇️\n",
      "⬇️⏩️✅⏩️➡️\n",
      "⬆️⏩️⬆️⬆️⬆️\n",
      "[[-0.0044 -0.0085 -0.0034 -0.0012 -0.0016]\n",
      " [-0.0077 -0.002  -0.001  -0.     -0.0025]\n",
      " [-0.0018 -0.0126  0.0031 -0.0029 -0.0022]\n",
      " [-0.0059  0.0017  0.0111 -0.     -0.    ]\n",
      " [-0.0048 -0.0029  0.0012  0.      0.    ]]\n",
      "mean_state_value -0.0019355356070330598\n",
      "episode 13/600\n",
      "p1 0.6112 p0 0.0972\n",
      "trajectorySteps 19\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [1 0 0 0 0]\n",
      " [9 1 2 0 0]\n",
      " [6 0 0 0 0]]\n",
      "➡️➡️➡️⬇️🔄\n",
      "⬇️⏪⏪⬅️⬅️\n",
      "🔄⬅️⏬⬆️⬇️\n",
      "⬇️⏩️✅⏩️➡️\n",
      "🔄⏩️⬆️⬆️⬆️\n",
      "[[-0.0044 -0.0085 -0.0034 -0.0012 -0.0016]\n",
      " [-0.0077 -0.002  -0.001  -0.     -0.0025]\n",
      " [-0.0018 -0.0126  0.0031 -0.0029 -0.0022]\n",
      " [-0.0068  0.0023  0.0123 -0.     -0.    ]\n",
      " [-0.0048 -0.0029  0.0012  0.      0.    ]]\n",
      "mean_state_value -0.0018942325215564127\n",
      "episode 14/600\n",
      "p1 0.612 p0 0.097\n",
      "trajectorySteps 80\n",
      "[[ 1  1  1  1  0]\n",
      " [ 2  2  2  1  0]\n",
      " [ 9  1  0  0  0]\n",
      " [ 8  2  2  0  0]\n",
      " [45  1  1  0  0]]\n",
      "➡️➡️➡️⬇️🔄\n",
      "⬇️⏪🔄➡️⬅️\n",
      "🔄⬅️⏬⬆️⬇️\n",
      "⬆️⏩️✅⏩️➡️\n",
      "🔄⏩️⬆️⬆️⬆️\n",
      "[[-0.0044 -0.0085 -0.0034 -0.0012 -0.0016]\n",
      " [-0.0077 -0.0029 -0.0029 -0.001  -0.0025]\n",
      " [-0.0019 -0.0136  0.0031 -0.0029 -0.0022]\n",
      " [-0.0079  0.0024  0.0136 -0.     -0.    ]\n",
      " [-0.0066 -0.0029  0.0019  0.      0.    ]]\n",
      "mean_state_value -0.0021258708580449543\n",
      "episode 15/600\n",
      "p1 0.6128 p0 0.0968\n",
      "trajectorySteps 51\n",
      "[[ 0  0  0  0  0]\n",
      " [ 3  0  0  0  0]\n",
      " [36  6  1  0  0]\n",
      " [ 3  0  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️➡️⬇️🔄\n",
      "⬇️⏪🔄➡️⬅️\n",
      "⬇️⬅️⏬⬆️⬇️\n",
      "⬇️⏩️✅⏩️➡️\n",
      "🔄⏩️⬆️⬆️⬆️\n",
      "[[-0.0044 -0.0084 -0.0034 -0.0012 -0.0016]\n",
      " [-0.0078 -0.0029 -0.0029 -0.001  -0.0025]\n",
      " [-0.0023 -0.0145  0.0037 -0.0029 -0.0022]\n",
      " [-0.0078  0.0024  0.0132 -0.     -0.    ]\n",
      " [-0.0066 -0.0029  0.0019  0.      0.    ]]\n",
      "mean_state_value -0.0021666554670989303\n",
      "episode 16/600\n",
      "p1 0.6135999999999999 p0 0.09659999999999999\n",
      "trajectorySteps 27\n",
      "[[ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [ 2  0  0  0  0]\n",
      " [ 5  1  2  0  0]\n",
      " [14  1  1  0  0]]\n",
      "➡️➡️➡️⬇️🔄\n",
      "⬇️⏪🔄➡️⬅️\n",
      "⬇️⬅️⏬⬆️⬇️\n",
      "🔄⏩️✅⏩️➡️\n",
      "🔄⏩️⬆️⬆️⬆️\n",
      "[[-0.0044 -0.0084 -0.0034 -0.0012 -0.0015]\n",
      " [-0.0077 -0.0029 -0.0029 -0.001  -0.0025]\n",
      " [-0.0023 -0.0145  0.0037 -0.0029 -0.0022]\n",
      " [-0.0089  0.0014  0.0129 -0.     -0.    ]\n",
      " [-0.0069 -0.0029  0.0025  0.      0.    ]]\n",
      "mean_state_value -0.0022421103274230334\n",
      "episode 17/600\n",
      "p1 0.6144000000000001 p0 0.0964\n",
      "trajectorySteps 53\n",
      "[[ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [ 4  1  0  0  0]\n",
      " [ 5  2  2  0  0]\n",
      " [36  2  0  0  0]]\n",
      "➡️➡️➡️⬇️🔄\n",
      "⬇️⏪🔄➡️⬅️\n",
      "⬇️⬅️⏬⬆️⬇️\n",
      "⬇️⏩️✅⏩️➡️\n",
      "⬆️⏩️⬆️⬆️⬆️\n",
      "[[-0.0044 -0.0084 -0.0034 -0.0012 -0.0015]\n",
      " [-0.0077 -0.0029 -0.0029 -0.001  -0.0025]\n",
      " [-0.0023 -0.0144  0.0037 -0.0029 -0.0022]\n",
      " [-0.009   0.0011  0.0142 -0.     -0.    ]\n",
      " [-0.0083 -0.004   0.0025  0.      0.    ]]\n",
      "mean_state_value -0.0023008271076852952\n",
      "episode 18/600\n",
      "p1 0.6152 p0 0.0962\n",
      "trajectorySteps 32\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 2  1  0  0  0]\n",
      " [ 1  0  2  3 15]\n",
      " [ 1  2  3  1  1]]\n",
      "➡️➡️➡️⬇️🔄\n",
      "⬇️⏪🔄➡️⬅️\n",
      "⬇️⬅️⏬⬆️⬇️\n",
      "⬆️⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️➡️➡️\n",
      "[[-0.0044 -0.0084 -0.0034 -0.0012 -0.0015]\n",
      " [-0.0077 -0.0029 -0.0029 -0.001  -0.0025]\n",
      " [-0.0023 -0.0144  0.0037 -0.0029 -0.0022]\n",
      " [-0.009   0.0011  0.0154  0.0006 -0.003 ]\n",
      " [-0.0092 -0.004   0.0014 -0.001  -0.    ]]\n",
      "mean_state_value -0.0024580880299923196\n",
      "episode 19/600\n",
      "p1 0.616 p0 0.096\n",
      "trajectorySteps 86\n",
      "[[ 0  1  2  2 13]\n",
      " [ 3  3  5 14 15]\n",
      " [ 9  1  0  3  0]\n",
      " [ 5  1  2  2  3]\n",
      " [ 0  0  0  1  1]]\n",
      "➡️➡️➡️⬇️🔄\n",
      "⬇️⏪⏩️⬆️🔄\n",
      "🔄⬅️⏬🔄⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬇️➡️\n",
      "[[-0.0043 -0.0084 -0.0034 -0.0012 -0.0016]\n",
      " [-0.0087 -0.0039 -0.0067 -0.0029 -0.0027]\n",
      " [-0.0024 -0.0144  0.0037 -0.0038 -0.0022]\n",
      " [-0.0099  0.0018  0.0166  0.0006 -0.0039]\n",
      " [-0.0092 -0.0039  0.0014 -0.001  -0.    ]]\n",
      "mean_state_value -0.002817413027883026\n",
      "episode 20/600\n",
      "p1 0.6168 p0 0.0958\n",
      "trajectorySteps 33\n",
      "[[0 2 2 4 0]\n",
      " [2 2 0 1 0]\n",
      " [7 1 0 8 0]\n",
      " [1 0 2 1 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️🔄⬇️🔄\n",
      "⬇️⏪⏩️⬆️🔄\n",
      "🔄⬅️⏬⬆️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬇️➡️\n",
      "[[-0.0043 -0.0093 -0.0034 -0.0013 -0.0016]\n",
      " [-0.0097 -0.0039 -0.0067 -0.0029 -0.0027]\n",
      " [-0.0024 -0.0144  0.0037 -0.0048 -0.0022]\n",
      " [-0.0099  0.0018  0.0179  0.0013 -0.0039]\n",
      " [-0.0092 -0.0039  0.0014 -0.001  -0.    ]]\n",
      "mean_state_value -0.0028567670109150047\n",
      "episode 21/600\n",
      "p1 0.6175999999999999 p0 0.09559999999999999\n",
      "trajectorySteps 71\n",
      "[[ 0  0  0  0  0]\n",
      " [ 7  3  0  0  0]\n",
      " [41  8  0  0  0]\n",
      " [ 8  1  2  0  0]\n",
      " [ 1  0  0  0  0]]\n",
      "➡️➡️🔄⬇️🔄\n",
      "⬇️⏪⏩️⬆️🔄\n",
      "🔄⬅️⏬⬆️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬇️➡️\n",
      "[[-0.0043 -0.0093 -0.0034 -0.0013 -0.0016]\n",
      " [-0.0117 -0.0039 -0.0067 -0.0029 -0.0027]\n",
      " [-0.0024 -0.0153  0.0037 -0.0048 -0.0022]\n",
      " [-0.0108  0.0024  0.0192  0.0013 -0.0039]\n",
      " [-0.0092 -0.0039  0.0014 -0.001  -0.    ]]\n",
      "mean_state_value -0.0029283940543505978\n",
      "episode 22/600\n",
      "p1 0.6184000000000001 p0 0.0954\n",
      "trajectorySteps 61\n",
      "[[ 0  0  0  0  0]\n",
      " [ 6  3  1  0  0]\n",
      " [32  5  0  0  0]\n",
      " [ 8  2  2  0  0]\n",
      " [ 2  0  0  0  0]]\n",
      "➡️➡️🔄⬇️🔄\n",
      "⬇️⏪⏩️⬆️🔄\n",
      "🔄⬅️⏬⬆️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬇️➡️\n",
      "[[-0.0043 -0.0093 -0.0034 -0.0013 -0.0016]\n",
      " [-0.0136 -0.0048 -0.0076 -0.0029 -0.0027]\n",
      " [-0.0029 -0.0172  0.0037 -0.0048 -0.0022]\n",
      " [-0.0108  0.0031  0.0204  0.0013 -0.0039]\n",
      " [-0.0091 -0.0039  0.0014 -0.001  -0.    ]]\n",
      "mean_state_value -0.0030937654872985484\n",
      "episode 23/600\n",
      "p1 0.6192 p0 0.09519999999999999\n",
      "trajectorySteps 16\n",
      "[[ 0  0  0  0  0]\n",
      " [ 2  0  0  0  0]\n",
      " [10  0  0  0  0]\n",
      " [ 1  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️🔄⬇️🔄\n",
      "⬇️⏪⏩️⬆️🔄\n",
      "🔄⬅️⏬⬆️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬇️➡️\n",
      "[[-0.0043 -0.0092 -0.0034 -0.0013 -0.0016]\n",
      " [-0.0136 -0.0048 -0.0076 -0.0029 -0.0027]\n",
      " [-0.0032 -0.0171  0.0037 -0.0048 -0.0022]\n",
      " [-0.0117  0.0037  0.0217  0.0013 -0.0039]\n",
      " [-0.0091 -0.0039  0.0014 -0.001  -0.    ]]\n",
      "mean_state_value -0.0030575363830229758\n",
      "episode 24/600\n",
      "p1 0.62 p0 0.095\n",
      "trajectorySteps 66\n",
      "[[ 0  0  0  7 14]\n",
      " [ 1  1  3  5  3]\n",
      " [ 2  0  0  0  1]\n",
      " [ 0  0  2  0  3]\n",
      " [ 0  0  1  2 21]]\n",
      "➡️➡️🔄⬅️🔄\n",
      "⬇️⏪⏩️🔄🔄\n",
      "🔄⬅️⏬⬆️⬅️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0043 -0.0092 -0.0033 -0.0014 -0.0016]\n",
      " [-0.0145 -0.0057 -0.0085 -0.0038 -0.0027]\n",
      " [-0.0032 -0.0171  0.0037 -0.0048 -0.0022]\n",
      " [-0.0117  0.0037  0.0214  0.0013 -0.0039]\n",
      " [-0.0091 -0.0039  0.002  -0.001  -0.0015]]\n",
      "mean_state_value -0.00325563476860235\n",
      "episode 25/600\n",
      "p1 0.6208 p0 0.0948\n",
      "trajectorySteps 8\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [4 0 0 0 0]\n",
      " [1 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️🔄⬅️🔄\n",
      "⬇️⏪⏩️🔄🔄\n",
      "🔄⬅️⏬⬆️⬅️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0043 -0.0092 -0.0033 -0.0014 -0.0016]\n",
      " [-0.0145 -0.0057 -0.0085 -0.0038 -0.0026]\n",
      " [-0.0033 -0.017   0.0037 -0.0048 -0.0022]\n",
      " [-0.0126  0.0044  0.0211  0.0013 -0.0039]\n",
      " [-0.0091 -0.0039  0.0021 -0.001  -0.0015]]\n",
      "mean_state_value -0.003273610556348164\n",
      "episode 26/600\n",
      "p1 0.6215999999999999 p0 0.09459999999999999\n",
      "trajectorySteps 7\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [2 0 0 0 0]\n",
      " [1 2 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️🔄⬅️🔄\n",
      "⬇️⏪⏩️🔄🔄\n",
      "🔄⬅️⏬⬆️⬅️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0043 -0.0092 -0.0033 -0.0014 -0.0016]\n",
      " [-0.0145 -0.0057 -0.0085 -0.0038 -0.0026]\n",
      " [-0.0034 -0.017   0.0037 -0.0047 -0.0022]\n",
      " [-0.0135  0.004   0.0208  0.0013 -0.0039]\n",
      " [-0.0091 -0.0039  0.0021 -0.001  -0.0015]]\n",
      "mean_state_value -0.0033296742808619994\n",
      "episode 27/600\n",
      "p1 0.6224000000000001 p0 0.0944\n",
      "trajectorySteps 6\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [1 1 0 0 0]\n",
      " [0 2 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️🔄⬅️🔄\n",
      "⬇️⏪⏩️🔄🔄\n",
      "🔄⬅️⏬⬆️⬅️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0043 -0.0092 -0.0033 -0.0014 -0.0016]\n",
      " [-0.0144 -0.0057 -0.0085 -0.0038 -0.0026]\n",
      " [-0.0034 -0.0179  0.0037 -0.0047 -0.0022]\n",
      " [-0.0135  0.0037  0.0221  0.0013 -0.0039]\n",
      " [-0.009  -0.0039  0.0021 -0.001  -0.0015]]\n",
      "mean_state_value -0.00331798819268115\n",
      "episode 28/600\n",
      "p1 0.6232 p0 0.09419999999999999\n",
      "trajectorySteps 42\n",
      "[[ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [20  3  2  0  0]\n",
      " [10  0  2  0  0]\n",
      " [ 4  0  0  0  0]]\n",
      "➡️➡️🔄⬅️🔄\n",
      "⬇️⏪⏩️🔄🔄\n",
      "🔄⬅️⏬⬆️⬅️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0043 -0.0092 -0.0033 -0.0014 -0.0016]\n",
      " [-0.0144 -0.0057 -0.0085 -0.0038 -0.0026]\n",
      " [-0.0035 -0.0197  0.0044 -0.0047 -0.0022]\n",
      " [-0.0136  0.0038  0.0234  0.0013 -0.0039]\n",
      " [-0.0091 -0.0039  0.0021 -0.001  -0.0015]]\n",
      "mean_state_value -0.003320010804297099\n",
      "episode 29/600\n",
      "p1 0.624 p0 0.094\n",
      "trajectorySteps 56\n",
      "[[ 1  1  0  0  0]\n",
      " [10  2  0  0  0]\n",
      " [31  2  1  0  0]\n",
      " [ 5  0  2  0  0]\n",
      " [ 1  0  0  0  0]]\n",
      "⬇️➡️🔄⬅️🔄\n",
      "⬇️⏪⏩️🔄🔄\n",
      "🔄⬅️⏬⬆️⬅️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0043 -0.0101 -0.0033 -0.0014 -0.0016]\n",
      " [-0.0154 -0.0057 -0.0085 -0.0038 -0.0026]\n",
      " [-0.004  -0.0206  0.005  -0.0047 -0.0022]\n",
      " [-0.0136  0.0038  0.0232  0.0013 -0.0039]\n",
      " [-0.0091 -0.0039  0.0021 -0.001  -0.0015]]\n",
      "mean_state_value -0.003434197838379423\n",
      "episode 30/600\n",
      "p1 0.6248 p0 0.0938\n",
      "trajectorySteps 29\n",
      "[[ 1  0  0  0  0]\n",
      " [ 5  1  0  0  0]\n",
      " [12  2  0  0  0]\n",
      " [ 3  1  2  0  0]\n",
      " [ 0  1  1  0  0]]\n",
      "⬇️➡️🔄⬅️🔄\n",
      "⬇️⏪⏩️🔄🔄\n",
      "🔄⬅️⏬⬆️⬅️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0043 -0.01   -0.0033 -0.0014 -0.0016]\n",
      " [-0.0163 -0.0057 -0.0084 -0.0038 -0.0026]\n",
      " [-0.0041 -0.0206  0.005  -0.0047 -0.0022]\n",
      " [-0.0146  0.0029  0.0229  0.0013 -0.0038]\n",
      " [-0.0091 -0.0038  0.0027 -0.001  -0.0015]]\n",
      "mean_state_value -0.003529283359090468\n",
      "episode 31/600\n",
      "p1 0.6255999999999999 p0 0.09359999999999999\n",
      "trajectorySteps 14\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [8 2 0 0 0]\n",
      " [1 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "⬇️➡️🔄⬅️🔄\n",
      "⬇️⏪⏩️🔄🔄\n",
      "🔄⬅️⏬⬆️⬅️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0043 -0.01   -0.0033 -0.0014 -0.0016]\n",
      " [-0.0163 -0.0057 -0.0084 -0.0038 -0.0026]\n",
      " [-0.0042 -0.0215  0.005  -0.0047 -0.0022]\n",
      " [-0.0146  0.0036  0.0242  0.0013 -0.0038]\n",
      " [-0.0091 -0.0038  0.0027 -0.001  -0.0015]]\n",
      "mean_state_value -0.003479837487421829\n",
      "episode 32/600\n",
      "p1 0.6264000000000001 p0 0.0934\n",
      "trajectorySteps 52\n",
      "[[ 3  4  1  0  0]\n",
      " [ 8  2  1  0  0]\n",
      " [20  4  0  0  0]\n",
      " [ 5  2  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄🔄🔄⬅️🔄\n",
      "⬇️⏪⏩️🔄🔄\n",
      "🔄⬅️⏬⬆️⬅️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0043 -0.0102 -0.0042 -0.0014 -0.0016]\n",
      " [-0.0173 -0.0057 -0.0093 -0.0037 -0.0026]\n",
      " [-0.0043 -0.0224  0.005  -0.0047 -0.0022]\n",
      " [-0.0158  0.0042  0.024   0.0013 -0.0038]\n",
      " [-0.009  -0.0038  0.0027 -0.001  -0.0015]]\n",
      "mean_state_value -0.0036681206074623764\n",
      "episode 33/600\n",
      "p1 0.6272 p0 0.09319999999999999\n",
      "trajectorySteps 30\n",
      "[[ 0  0  0  0  0]\n",
      " [ 4  0  0  0  0]\n",
      " [17  3  0  0  0]\n",
      " [ 2  2  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄🔄🔄⬅️🔄\n",
      "⬇️⏪⏩️🔄🔄\n",
      "🔄⬅️⏬⬆️⬅️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0043 -0.0102 -0.0042 -0.0014 -0.0016]\n",
      " [-0.0172 -0.0056 -0.0093 -0.0037 -0.0026]\n",
      " [-0.0047 -0.0232  0.005  -0.0047 -0.0021]\n",
      " [-0.0157  0.0039  0.0237  0.0013 -0.0038]\n",
      " [-0.009  -0.0038  0.0027 -0.001  -0.0015]]\n",
      "mean_state_value -0.0037297567493527802\n",
      "episode 34/600\n",
      "p1 0.628 p0 0.093\n",
      "trajectorySteps 7\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [2 0 0 0 0]\n",
      " [2 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄🔄🔄⬅️🔄\n",
      "⬇️⏪⏩️🔄🔄\n",
      "🔄⬅️⏬⬆️⬅️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0043 -0.0102 -0.0042 -0.0014 -0.0016]\n",
      " [-0.0172 -0.0056 -0.0093 -0.0037 -0.0026]\n",
      " [-0.0047 -0.0232  0.005  -0.0047 -0.0021]\n",
      " [-0.0166  0.0046  0.0251  0.0013 -0.0038]\n",
      " [-0.009  -0.0038  0.0027 -0.001  -0.0015]]\n",
      "mean_state_value -0.0036750287087763734\n",
      "episode 35/600\n",
      "p1 0.6288 p0 0.0928\n",
      "trajectorySteps 62\n",
      "[[ 6  0  0  0  0]\n",
      " [ 8  1  0  0  0]\n",
      " [40  3  0  0  0]\n",
      " [ 1  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄🔄🔄⬅️🔄\n",
      "⬇️⏪⏩️🔄🔄\n",
      "🔄⬅️⏬⬆️⬅️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0043 -0.0101 -0.0042 -0.0014 -0.0016]\n",
      " [-0.0173 -0.0056 -0.0093 -0.0037 -0.0026]\n",
      " [-0.0048 -0.025   0.005  -0.0047 -0.0021]\n",
      " [-0.0166  0.0052  0.0248  0.0013 -0.0038]\n",
      " [-0.009  -0.0038  0.0027 -0.001  -0.0015]]\n",
      "mean_state_value -0.0037337977189682557\n",
      "episode 36/600\n",
      "p1 0.6295999999999999 p0 0.09259999999999999\n",
      "trajectorySteps 16\n",
      "[[0 0 0 0 0]\n",
      " [3 1 0 0 0]\n",
      " [7 0 0 0 0]\n",
      " [2 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄🔄🔄⬅️🔄\n",
      "⬇️⏪⏩️🔄🔄\n",
      "🔄⬅️⏬⬆️⬅️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0043 -0.0101 -0.0042 -0.0014 -0.0016]\n",
      " [-0.0181 -0.0056 -0.0093 -0.0037 -0.0026]\n",
      " [-0.005  -0.0249  0.005  -0.0046 -0.0021]\n",
      " [-0.0176  0.0059  0.0262  0.0013 -0.0038]\n",
      " [-0.009  -0.0038  0.0027 -0.001  -0.0015]]\n",
      "mean_state_value -0.0037262227123154358\n",
      "episode 37/600\n",
      "p1 0.6304000000000001 p0 0.0924\n",
      "trajectorySteps 121\n",
      "[[ 4  5  0  0  0]\n",
      " [ 9  3  0  0  0]\n",
      " [75  2  0  0  0]\n",
      " [19  2  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄➡️🔄⬅️🔄\n",
      "⬇️⏬⏩️🔄🔄\n",
      "🔄⬅️⏬⬆️⬅️\n",
      "🔄⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0043 -0.011  -0.0042 -0.0014 -0.0016]\n",
      " [-0.0183 -0.0074 -0.0092 -0.0037 -0.0026]\n",
      " [-0.0059 -0.0249  0.005  -0.0046 -0.0021]\n",
      " [-0.0194  0.0066  0.0275  0.0013 -0.0038]\n",
      " [-0.0089 -0.0038  0.0027 -0.001  -0.0015]]\n",
      "mean_state_value -0.0038651881225173494\n",
      "episode 38/600\n",
      "p1 0.6312 p0 0.09219999999999999\n",
      "trajectorySteps 73\n",
      "[[ 0  0  0  0  0]\n",
      " [10  1  0  0  0]\n",
      " [46  4  0  0  0]\n",
      " [ 9  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄➡️🔄⬅️🔄\n",
      "⬆️⏪⏩️🔄🔄\n",
      "🔄⬅️⏬⬆️⬅️\n",
      "🔄⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0043 -0.011  -0.0042 -0.0014 -0.0016]\n",
      " [-0.0184 -0.0074 -0.0092 -0.0037 -0.0026]\n",
      " [-0.0064 -0.0267  0.005  -0.0046 -0.0021]\n",
      " [-0.0195  0.0072  0.0273  0.0013 -0.0038]\n",
      " [-0.0089 -0.0038  0.0027 -0.001  -0.0015]]\n",
      "mean_state_value -0.0039430425060034404\n",
      "episode 39/600\n",
      "p1 0.632 p0 0.092\n",
      "trajectorySteps 26\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 9  1  0  0  0]\n",
      " [12  1  2  0  0]\n",
      " [ 1  0  0  0  0]]\n",
      "🔄➡️🔄⬅️🔄\n",
      "⬆️⏪⏩️🔄🔄\n",
      "🔄⬅️⏬⬆️⬅️\n",
      "⬇️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0043 -0.011  -0.0042 -0.0014 -0.0016]\n",
      " [-0.0184 -0.0074 -0.0092 -0.0037 -0.0026]\n",
      " [-0.0067 -0.0266  0.005  -0.0046 -0.0021]\n",
      " [-0.0205  0.0079  0.0286  0.0013 -0.0038]\n",
      " [-0.0089 -0.0038  0.0028 -0.001  -0.0015]]\n",
      "mean_state_value -0.0039026426423319676\n",
      "episode 40/600\n",
      "p1 0.6328 p0 0.09179999999999999\n",
      "trajectorySteps 95\n",
      "[[24  1  9  3  0]\n",
      " [12  3  1 15  0]\n",
      " [15  3  0  2  0]\n",
      " [ 0  0  2  1  0]\n",
      " [ 0  1  2  1  0]]\n",
      "➡️➡️⬅️⬅️🔄\n",
      "⬆️⏫️⏩️🔄🔄\n",
      "🔄⬅️⏬⬆️⬅️\n",
      "⬇️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️🔄⬅️\n",
      "[[-0.0045 -0.011  -0.0051 -0.0015 -0.0016]\n",
      " [-0.0213 -0.0075 -0.0092 -0.0037 -0.0026]\n",
      " [-0.0068 -0.0265  0.005  -0.0055 -0.0021]\n",
      " [-0.0205  0.0079  0.0299  0.0013 -0.0038]\n",
      " [-0.0089 -0.0038  0.0025 -0.001  -0.0015]]\n",
      "mean_state_value -0.0040568806377790645\n",
      "episode 41/600\n",
      "p1 0.6335999999999999 p0 0.09159999999999999\n",
      "trajectorySteps 16\n",
      "[[1 3 3 0 0]\n",
      " [1 0 2 1 0]\n",
      " [1 0 2 0 0]\n",
      " [0 0 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️⬅️⬅️⬅️🔄\n",
      "⬆️⏫️⏫️🔄🔄\n",
      "🔄⬅️⏬⬆️⬅️\n",
      "⬇️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️🔄⬅️\n",
      "[[-0.0045 -0.011  -0.006  -0.0015 -0.0016]\n",
      " [-0.0212 -0.0075 -0.0101 -0.0046 -0.0026]\n",
      " [-0.0068 -0.0265  0.0048 -0.0055 -0.0021]\n",
      " [-0.0204  0.008   0.0312  0.0013 -0.0038]\n",
      " [-0.0089 -0.0038  0.0025 -0.001  -0.0015]]\n",
      "mean_state_value -0.00411021564784954\n",
      "episode 42/600\n",
      "p1 0.6344000000000001 p0 0.0914\n",
      "trajectorySteps 265\n",
      "[[93 98 11  1  0]\n",
      " [ 9 13  1  0  0]\n",
      " [19  2  0  0  0]\n",
      " [ 7  1  2  0  0]\n",
      " [ 5  3  0  0  0]]\n",
      "🔄➡️➡️⬅️🔄\n",
      "⬆️⏬⏫️🔄🔄\n",
      "⬇️⬅️⏬⬆️⬅️\n",
      "⬇️⏩️✅⏪⬆️\n",
      "🔄⏩️⬆️🔄⬅️\n",
      "[[-0.0056 -0.0204 -0.006  -0.0015 -0.0016]\n",
      " [-0.0213 -0.0102 -0.0101 -0.0046 -0.0026]\n",
      " [-0.0069 -0.0273  0.0048 -0.0055 -0.0021]\n",
      " [-0.0206  0.0087  0.0325  0.0013 -0.0038]\n",
      " [-0.0099 -0.0065  0.0025 -0.001  -0.0015]]\n",
      "mean_state_value -0.004754444835729937\n",
      "episode 43/600\n",
      "p1 0.6352 p0 0.09119999999999999\n",
      "trajectorySteps 19\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [3 1 0 0 0]\n",
      " [1 0 2 0 0]\n",
      " [9 2 1 0 0]]\n",
      "🔄➡️➡️⬅️🔄\n",
      "⬆️⏬⏫️🔄🔄\n",
      "⬇️⬅️⏬⬆️⬅️\n",
      "⬇️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️🔄⬅️\n",
      "[[-0.0056 -0.0203 -0.006  -0.0015 -0.0016]\n",
      " [-0.0212 -0.0102 -0.01   -0.0046 -0.0025]\n",
      " [-0.007  -0.0273  0.0048 -0.0055 -0.0021]\n",
      " [-0.0206  0.0087  0.0323  0.0013 -0.0037]\n",
      " [-0.011  -0.0066  0.0031 -0.001  -0.0015]]\n",
      "mean_state_value -0.004777799112825532\n",
      "episode 44/600\n",
      "p1 0.636 p0 0.091\n",
      "trajectorySteps 144\n",
      "[[23  2  3  7 29]\n",
      " [ 7  1  0  5 43]\n",
      " [ 5  3  2  3  3]\n",
      " [ 2  0  2  0  0]\n",
      " [ 2  1  1  0  0]]\n",
      "🔄➡️➡️⬅️🔄\n",
      "⬇️⏬⏫️🔄⬆️\n",
      "⬇️⬅️⏬⬆️⬇️\n",
      "⬇️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️🔄⬅️\n",
      "[[-0.0058 -0.0204 -0.0061 -0.0017 -0.0018]\n",
      " [-0.0222 -0.0102 -0.01   -0.0046 -0.0032]\n",
      " [-0.007  -0.0273  0.0039 -0.0064 -0.0021]\n",
      " [-0.0205  0.0087  0.0321  0.0013 -0.0037]\n",
      " [-0.012  -0.0065  0.0038 -0.001  -0.0015]]\n",
      "mean_state_value -0.004957111983398589\n",
      "episode 45/600\n",
      "p1 0.6368 p0 0.09079999999999999\n",
      "trajectorySteps 20\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [2 1 0 0 0]\n",
      " [7 1 2 0 0]\n",
      " [7 0 0 0 0]]\n",
      "🔄➡️➡️⬅️🔄\n",
      "⬇️⏬⏫️🔄⬆️\n",
      "⬇️⬅️⏬⬆️⬇️\n",
      "⬇️⏩️✅⏪⬆️\n",
      "🔄⏩️⬆️🔄⬅️\n",
      "[[-0.0057 -0.0203 -0.006  -0.0016 -0.0018]\n",
      " [-0.0221 -0.0102 -0.01   -0.0046 -0.0032]\n",
      " [-0.0069 -0.0272  0.0039 -0.0064 -0.0021]\n",
      " [-0.0215  0.0093  0.032   0.0013 -0.0037]\n",
      " [-0.0121 -0.0065  0.0038 -0.001  -0.0015]]\n",
      "mean_state_value -0.00496540525325382\n",
      "episode 46/600\n",
      "p1 0.6376 p0 0.09059999999999999\n",
      "trajectorySteps 12\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [3 1 0 0 0]\n",
      " [3 1 2 0 0]\n",
      " [2 0 0 0 0]]\n",
      "🔄➡️➡️⬅️🔄\n",
      "⬇️⏬⏫️🔄⬆️\n",
      "⬇️⬅️⏬⬆️⬇️\n",
      "⬇️⏩️✅⏪⬆️\n",
      "🔄⏩️⬆️🔄⬅️\n",
      "[[-0.0057 -0.0203 -0.006  -0.0016 -0.0018]\n",
      " [-0.0221 -0.0101 -0.01   -0.0045 -0.0032]\n",
      " [-0.0069 -0.0271  0.0039 -0.0064 -0.0021]\n",
      " [-0.0224  0.0099  0.0318  0.0013 -0.0037]\n",
      " [-0.012  -0.0065  0.0038 -0.001  -0.0015]]\n",
      "mean_state_value -0.004972068997859192\n",
      "episode 47/600\n",
      "p1 0.6384000000000001 p0 0.0904\n",
      "trajectorySteps 27\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [ 6  0  2  0  0]\n",
      " [13  1  3  1  0]]\n",
      "🔄➡️➡️⬅️🔄\n",
      "⬇️⏬⏫️🔄⬆️\n",
      "🔄⬅️⏬⬆️⬇️\n",
      "⬇️⏩️✅⏪⬆️\n",
      "🔄⏩️⬆️🔄⬅️\n",
      "[[-0.0057 -0.0203 -0.006  -0.0016 -0.0018]\n",
      " [-0.022  -0.0101 -0.0099 -0.0045 -0.0032]\n",
      " [-0.0069 -0.0271  0.0039 -0.0063 -0.0021]\n",
      " [-0.0224  0.01    0.0325  0.0013 -0.0037]\n",
      " [-0.0133 -0.0065  0.0044 -0.001  -0.0014]]\n",
      "mean_state_value -0.004954804932504706\n",
      "episode 48/600\n",
      "p1 0.6392 p0 0.09019999999999999\n",
      "trajectorySteps 13\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [1 0 0 0 0]\n",
      " [4 1 2 0 0]\n",
      " [5 0 0 0 0]]\n",
      "🔄➡️➡️⬅️🔄\n",
      "⬇️⏬⏫️🔄⬆️\n",
      "🔄⬅️⏬⬆️⬇️\n",
      "⬇️⏩️✅⏪⬆️\n",
      "🔄⏩️⬆️🔄⬅️\n",
      "[[-0.0057 -0.0202 -0.006  -0.0016 -0.0018]\n",
      " [-0.022  -0.0101 -0.0099 -0.0045 -0.0032]\n",
      " [-0.0069 -0.027   0.004  -0.0063 -0.0021]\n",
      " [-0.0234  0.0107  0.0339  0.0013 -0.0037]\n",
      " [-0.0135 -0.0065  0.0044 -0.001  -0.0014]]\n",
      "mean_state_value -0.004904336935258966\n",
      "episode 49/600\n",
      "p1 0.64 p0 0.09\n",
      "trajectorySteps 16\n",
      "[[ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [10  2  0  0  0]\n",
      " [ 0  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄➡️➡️⬅️🔄\n",
      "⬆️⏬⏫️🔄⬆️\n",
      "⬇️⬅️⏬⬆️⬇️\n",
      "⬇️⏩️✅⏪⬆️\n",
      "🔄⏩️⬆️🔄⬅️\n",
      "[[-0.0057 -0.0202 -0.006  -0.0016 -0.0018]\n",
      " [-0.022  -0.0101 -0.0099 -0.0045 -0.0031]\n",
      " [-0.0072 -0.0278  0.004  -0.0063 -0.0021]\n",
      " [-0.0233  0.0114  0.0351  0.0013 -0.0037]\n",
      " [-0.0134 -0.0065  0.0044 -0.001  -0.0014]]\n",
      "mean_state_value -0.0048554822907580265\n",
      "episode 50/600\n",
      "p1 0.6408 p0 0.08979999999999999\n",
      "trajectorySteps 28\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 2  0  0  0  0]\n",
      " [ 2  0  2  0  0]\n",
      " [18  3  1  0  0]]\n",
      "🔄➡️➡️⬅️🔄\n",
      "⬆️⏬⏫️🔄⬆️\n",
      "⬇️⬅️⏬⬆️⬇️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "🔄⏩️⬆️🔄⬅️\n",
      "[[-0.0057 -0.0201 -0.006  -0.0016 -0.0018]\n",
      " [-0.0219 -0.01   -0.0099 -0.0045 -0.0031]\n",
      " [-0.0073 -0.0278  0.004  -0.0063 -0.0021]\n",
      " [-0.0233  0.0114  0.035   0.0013 -0.0037]\n",
      " [-0.0163 -0.0064  0.005  -0.001  -0.0014]]\n",
      "mean_state_value -0.004941517421290629\n",
      "episode 51/600\n",
      "p1 0.6416 p0 0.08959999999999999\n",
      "trajectorySteps 22\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 4  0  0  0  0]\n",
      " [ 4  1  2  0  0]\n",
      " [11  0  0  0  0]]\n",
      "🔄➡️➡️⬅️🔄\n",
      "⬆️⏬⏫️🔄⬆️\n",
      "⬇️⬅️⏬⬆️⬇️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "🔄⏩️⬆️🔄⬅️\n",
      "[[-0.0057 -0.0201 -0.006  -0.0016 -0.0018]\n",
      " [-0.0219 -0.01   -0.0099 -0.0045 -0.0031]\n",
      " [-0.0073 -0.0277  0.004  -0.0063 -0.0021]\n",
      " [-0.0241  0.0121  0.0357  0.0013 -0.0037]\n",
      " [-0.0164 -0.0064  0.0051 -0.001  -0.0014]]\n",
      "mean_state_value -0.0049114621993129725\n",
      "episode 52/600\n",
      "p1 0.6424000000000001 p0 0.0894\n",
      "trajectorySteps 51\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [19  2  0  0  0]\n",
      " [17  0  2  0  0]\n",
      " [ 6  3  2  0  0]]\n",
      "🔄➡️➡️⬅️🔄\n",
      "⬆️⏬⏫️🔄⬆️\n",
      "⬇️⬅️⏬⬆️⬇️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "🔄⏩️⬆️🔄⬅️\n",
      "[[-0.0057 -0.02   -0.0059 -0.0016 -0.0018]\n",
      " [-0.0218 -0.01   -0.0098 -0.0045 -0.0031]\n",
      " [-0.0077 -0.0277  0.004  -0.0063 -0.0021]\n",
      " [-0.0244  0.0121  0.0371  0.0013 -0.0037]\n",
      " [-0.0174 -0.0082  0.0058 -0.001  -0.0014]]\n",
      "mean_state_value -0.0049498137260637586\n",
      "episode 53/600\n",
      "p1 0.6432 p0 0.08919999999999999\n",
      "trajectorySteps 200\n",
      "[[53  8 40 32  7]\n",
      " [ 8  1  7  7  3]\n",
      " [ 2  1  1  3 13]\n",
      " [ 0  0  2  1  8]\n",
      " [ 0  0  0  2  1]]\n",
      "🔄➡️➡️➡️⬅️\n",
      "⬆️⏬⏫️🔄⬆️\n",
      "⬇️⬅️⏬⬆️⬇️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "🔄⏩️⬆️➡️⬅️\n",
      "[[-0.0063 -0.0209 -0.0089 -0.0017 -0.0019]\n",
      " [-0.0219 -0.0109 -0.0116 -0.0054 -0.0032]\n",
      " [-0.0077 -0.0276  0.0047 -0.0071 -0.0022]\n",
      " [-0.0244  0.0122  0.0385  0.0013 -0.0037]\n",
      " [-0.0173 -0.0082  0.0058 -0.0019 -0.0014]]\n",
      "mean_state_value -0.005264422268832986\n",
      "episode 54/600\n",
      "p1 0.644 p0 0.089\n",
      "trajectorySteps 237\n",
      "[[39  1  8 59 57]\n",
      " [ 6  0  7 17  6]\n",
      " [13  1  0  2  2]\n",
      " [ 8  0  2  1  1]\n",
      " [ 0  0  0  4  3]]\n",
      "🔄➡️➡️🔄🔄\n",
      "⬇️⏬⏫️⬆️⬆️\n",
      "⬇️⬅️⏬🔄⬇️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "🔄⏩️⬆️➡️🔄\n",
      "[[-0.007  -0.0209 -0.0098 -0.0024 -0.0028]\n",
      " [-0.0218 -0.0108 -0.0142 -0.008  -0.0033]\n",
      " [-0.0078 -0.0275  0.0047 -0.0071 -0.0021]\n",
      " [-0.0245  0.0122  0.0399  0.002  -0.0037]\n",
      " [-0.0173 -0.0082  0.0058 -0.0028 -0.0014]]\n",
      "mean_state_value -0.005556079514433344\n",
      "episode 55/600\n",
      "p1 0.6448 p0 0.08879999999999999\n",
      "trajectorySteps 272\n",
      "[[ 10   7  16 109  96]\n",
      " [  0   0   6  10   8]\n",
      " [  1   2   3   1   0]\n",
      " [  0   1   2   0   0]\n",
      " [  0   0   0   0   0]]\n",
      "🔄➡️➡️➡️⬅️\n",
      "⬇️⏬⏩️➡️⬅️\n",
      "⬇️⬅️⏬🔄⬇️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "🔄⏩️⬆️➡️🔄\n",
      "[[-0.0071 -0.0208 -0.0125 -0.0034 -0.0043]\n",
      " [-0.0218 -0.0108 -0.0151 -0.0089 -0.0034]\n",
      " [-0.0078 -0.0292  0.0037 -0.008  -0.0021]\n",
      " [-0.0245  0.0123  0.0412  0.002  -0.0036]\n",
      " [-0.0173 -0.0081  0.0058 -0.0028 -0.0014]]\n",
      "mean_state_value -0.00592633115779189\n",
      "episode 56/600\n",
      "p1 0.6456 p0 0.08859999999999998\n",
      "trajectorySteps 24\n",
      "[[0 0 0 0 0]\n",
      " [1 0 0 0 0]\n",
      " [4 0 0 0 0]\n",
      " [4 0 2 0 0]\n",
      " [9 1 3 0 0]]\n",
      "🔄➡️➡️➡️⬅️\n",
      "⬇️⏬⏩️➡️⬅️\n",
      "⬇️⬅️⏬🔄⬇️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "🔄⏩️⬆️➡️🔄\n",
      "[[-0.0071 -0.0208 -0.0125 -0.0034 -0.0043]\n",
      " [-0.0217 -0.0108 -0.0151 -0.0089 -0.0034]\n",
      " [-0.0078 -0.0292  0.0037 -0.008  -0.0021]\n",
      " [-0.0244  0.0123  0.0426  0.002  -0.0036]\n",
      " [-0.0184 -0.0081  0.0065 -0.0027 -0.0014]]\n",
      "mean_state_value -0.005867608595656926\n",
      "episode 57/600\n",
      "p1 0.6464000000000001 p0 0.08839999999999999\n",
      "trajectorySteps 10\n",
      "[[0 0 0 0 0]\n",
      " [1 0 0 0 0]\n",
      " [3 0 0 0 0]\n",
      " [3 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄➡️➡️➡️⬅️\n",
      "⬇️⏬⏩️➡️⬅️\n",
      "⬇️⬅️⏬🔄⬇️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "🔄⏩️⬆️➡️🔄\n",
      "[[-0.0071 -0.0207 -0.0124 -0.0034 -0.0043]\n",
      " [-0.0217 -0.0108 -0.015  -0.0089 -0.0034]\n",
      " [-0.0078 -0.0291  0.0037 -0.008  -0.0021]\n",
      " [-0.0252  0.0129  0.0425  0.002  -0.0036]\n",
      " [-0.0184 -0.0081  0.0065 -0.0027 -0.0014]]\n",
      "mean_state_value -0.005863106793134476\n",
      "episode 58/600\n",
      "p1 0.6472 p0 0.08819999999999999\n",
      "trajectorySteps 24\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [10  0  0  0  0]\n",
      " [ 8  0  2  0  0]\n",
      " [ 2  1  1  0  0]]\n",
      "🔄➡️➡️➡️⬅️\n",
      "⬇️⏬⏩️➡️⬅️\n",
      "⬇️⬅️⏬🔄⬇️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "🔄⏩️⬆️➡️🔄\n",
      "[[-0.007  -0.0207 -0.0124 -0.0034 -0.0043]\n",
      " [-0.0216 -0.0107 -0.015  -0.0089 -0.0034]\n",
      " [-0.0078 -0.029   0.0037 -0.008  -0.0021]\n",
      " [-0.0253  0.0129  0.0439  0.002  -0.0036]\n",
      " [-0.0193 -0.0081  0.0072 -0.0027 -0.0014]]\n",
      "mean_state_value -0.005799773258618852\n",
      "episode 59/600\n",
      "p1 0.648 p0 0.088\n",
      "trajectorySteps 20\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [6 0 0 0 0]\n",
      " [5 0 2 0 0]\n",
      " [3 1 3 0 0]]\n",
      "🔄➡️➡️➡️⬅️\n",
      "⬇️⏬⏩️➡️⬅️\n",
      "⬇️⬅️⏬🔄⬇️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "🔄⏩️⬆️➡️🔄\n",
      "[[-0.007  -0.0206 -0.0124 -0.0034 -0.0043]\n",
      " [-0.0216 -0.0107 -0.015  -0.0088 -0.0033]\n",
      " [-0.0078 -0.029   0.0037 -0.0079 -0.0021]\n",
      " [-0.0252  0.013   0.0438  0.002  -0.0036]\n",
      " [-0.0202 -0.0081  0.0077 -0.0027 -0.0014]]\n",
      "mean_state_value -0.005805359355558168\n",
      "episode 60/600\n",
      "p1 0.6488 p0 0.08779999999999999\n",
      "trajectorySteps 16\n",
      "[[0 0 0 0 0]\n",
      " [1 0 0 0 0]\n",
      " [3 0 0 0 0]\n",
      " [2 0 2 0 0]\n",
      " [1 2 5 0 0]]\n",
      "🔄➡️➡️➡️⬅️\n",
      "⬇️⏬⏩️➡️⬅️\n",
      "⬇️⬅️⏬🔄⬇️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "🔄⏩️⬆️➡️🔄\n",
      "[[-0.007  -0.0206 -0.0124 -0.0034 -0.0043]\n",
      " [-0.0215 -0.0107 -0.0149 -0.0088 -0.0033]\n",
      " [-0.0078 -0.0289  0.0037 -0.0079 -0.0021]\n",
      " [-0.0252  0.013   0.0452  0.002  -0.0036]\n",
      " [-0.0211 -0.0089  0.0083 -0.0027 -0.0014]]\n",
      "mean_state_value -0.005772288586039952\n",
      "episode 61/600\n",
      "p1 0.6496 p0 0.08759999999999998\n",
      "trajectorySteps 23\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [7 2 0 0 0]\n",
      " [4 2 2 0 0]\n",
      " [5 1 0 0 0]]\n",
      "🔄➡️➡️➡️⬅️\n",
      "⬇️⏬⏩️➡️⬅️\n",
      "⬇️⬅️⏬🔄⬇️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "🔄⏩️⬆️➡️🔄\n",
      "[[-0.007  -0.0206 -0.0123 -0.0034 -0.0043]\n",
      " [-0.0215 -0.0107 -0.0149 -0.0088 -0.0033]\n",
      " [-0.0079 -0.0289  0.0038 -0.0079 -0.0021]\n",
      " [-0.026   0.0129  0.0466  0.002  -0.0036]\n",
      " [-0.0221 -0.0089  0.0083 -0.0027 -0.0014]]\n",
      "mean_state_value -0.005789249576382816\n",
      "episode 62/600\n",
      "p1 0.6504000000000001 p0 0.08739999999999999\n",
      "trajectorySteps 29\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [9 2 1 0 0]\n",
      " [7 0 2 0 0]\n",
      " [8 0 0 0 0]]\n",
      "🔄➡️➡️➡️⬅️\n",
      "⬇️⏬⏩️➡️⬅️\n",
      "⬇️⬅️⏬🔄⬇️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "🔄⏩️⬆️➡️🔄\n",
      "[[-0.007  -0.0205 -0.0123 -0.0034 -0.0043]\n",
      " [-0.0214 -0.0107 -0.0149 -0.0088 -0.0033]\n",
      " [-0.0081 -0.0297  0.0045 -0.0079 -0.0021]\n",
      " [-0.0261  0.0129  0.0479  0.002  -0.0036]\n",
      " [-0.0222 -0.0089  0.0083 -0.0027 -0.0014]]\n",
      "mean_state_value -0.005736634395363161\n",
      "episode 63/600\n",
      "p1 0.6512 p0 0.08719999999999999\n",
      "trajectorySteps 13\n",
      "[[0 0 0 0 0]\n",
      " [1 0 0 0 0]\n",
      " [6 2 0 0 0]\n",
      " [1 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄➡️➡️➡️⬅️\n",
      "⬆️⏬⏩️➡️⬅️\n",
      "⬇️⬅️⏬🔄⬇️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "🔄⏩️⬆️➡️🔄\n",
      "[[-0.007  -0.0205 -0.0123 -0.0033 -0.0043]\n",
      " [-0.0214 -0.0106 -0.0148 -0.0088 -0.0033]\n",
      " [-0.0082 -0.0305  0.0045 -0.0079 -0.0021]\n",
      " [-0.0261  0.0137  0.0493  0.0021 -0.0036]\n",
      " [-0.0221 -0.0089  0.0083 -0.0027 -0.0014]]\n",
      "mean_state_value -0.005670845201632753\n",
      "episode 64/600\n",
      "p1 0.652 p0 0.087\n",
      "trajectorySteps 33\n",
      "[[9 3 3 0 0]\n",
      " [2 0 2 5 2]\n",
      " [1 0 0 2 0]\n",
      " [0 0 2 2 0]\n",
      " [0 0 0 0 0]]\n",
      "⬇️➡️➡️➡️⬅️\n",
      "⬆️⏬⏩️➡️⬆️\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "🔄⏩️⬆️➡️🔄\n",
      "[[-0.0072 -0.0204 -0.0132 -0.0033 -0.0042]\n",
      " [-0.0214 -0.0106 -0.0148 -0.0096 -0.0033]\n",
      " [-0.0082 -0.0304  0.0045 -0.0087 -0.0021]\n",
      " [-0.026   0.0137  0.0506  0.0019 -0.0036]\n",
      " [-0.0221 -0.0088  0.0084 -0.0027 -0.0014]]\n",
      "mean_state_value -0.005722786753505\n",
      "episode 65/600\n",
      "p1 0.6528 p0 0.08679999999999999\n",
      "trajectorySteps 10\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [3 1 0 0 0]\n",
      " [3 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "⬇️➡️➡️➡️⬅️\n",
      "⬆️⏬⏩️➡️⬆️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "🔄⏩️⬆️➡️🔄\n",
      "[[-0.0072 -0.0204 -0.0132 -0.0033 -0.0042]\n",
      " [-0.0213 -0.0106 -0.0148 -0.0096 -0.0033]\n",
      " [-0.0083 -0.0303  0.0045 -0.0087 -0.0021]\n",
      " [-0.0269  0.0145  0.052   0.0019 -0.0036]\n",
      " [-0.022  -0.0088  0.0084 -0.0027 -0.0014]]\n",
      "mean_state_value -0.005656003361608779\n",
      "episode 66/600\n",
      "p1 0.6536 p0 0.08659999999999998\n",
      "trajectorySteps 7\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [1 2 2 0 0]\n",
      " [0 0 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "⬇️➡️➡️➡️⬅️\n",
      "⬆️⏬⏩️➡️⬆️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "🔄⏩️⬆️➡️🔄\n",
      "[[-0.0072 -0.0203 -0.0131 -0.0033 -0.0042]\n",
      " [-0.0213 -0.0106 -0.0147 -0.0096 -0.0033]\n",
      " [-0.0083 -0.032   0.0052 -0.0087 -0.0021]\n",
      " [-0.0268  0.0145  0.0528  0.0019 -0.0036]\n",
      " [-0.022  -0.0088  0.0084 -0.0027 -0.0014]]\n",
      "mean_state_value -0.005644676072106134\n",
      "episode 67/600\n",
      "p1 0.6544000000000001 p0 0.08639999999999999\n",
      "trajectorySteps 249\n",
      "[[ 1  2  6 41 51]\n",
      " [ 3  3  3  5 11]\n",
      " [ 3  2  0  2 13]\n",
      " [ 0  0  2  1 14]\n",
      " [ 0  0  0 12 74]]\n",
      "🔄➡️➡️⬇️⬇️\n",
      "⬆️⏬⏩️⬆️⬇️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "🔄⏩️⬆️⬅️⬆️\n",
      "[[-0.0072 -0.0203 -0.0148 -0.0036 -0.0049]\n",
      " [-0.0238 -0.0105 -0.0155 -0.0095 -0.0034]\n",
      " [-0.0084 -0.0319  0.0052 -0.0087 -0.0021]\n",
      " [-0.0267  0.0146  0.0527  0.0025 -0.0035]\n",
      " [-0.0219 -0.0088  0.0084 -0.0036 -0.0023]]\n",
      "mean_state_value -0.005928567822148023\n",
      "episode 68/600\n",
      "p1 0.6552 p0 0.08619999999999998\n",
      "trajectorySteps 6\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [1 2 1 0 0]\n",
      " [0 0 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄➡️➡️⬇️⬇️\n",
      "⬆️⏬⏩️⬆️⬇️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "🔄⏩️⬆️⬅️⬆️\n",
      "[[-0.0072 -0.0202 -0.0148 -0.0036 -0.0049]\n",
      " [-0.0237 -0.0105 -0.0155 -0.0095 -0.0034]\n",
      " [-0.0084 -0.0327  0.0059 -0.0087 -0.0021]\n",
      " [-0.0267  0.0146  0.0541  0.0025 -0.0035]\n",
      " [-0.0219 -0.0088  0.0084 -0.0036 -0.0023]]\n",
      "mean_state_value -0.00585219453887419\n",
      "episode 69/600\n",
      "p1 0.656 p0 0.086\n",
      "trajectorySteps 44\n",
      "[[14  3  1  1  0]\n",
      " [ 3  7  2  1  0]\n",
      " [ 2  6  1  0  0]\n",
      " [ 0  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄➡️➡️⬇️⬇️\n",
      "⬆️⏪⏩️⬆️⬇️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "🔄⏩️⬆️⬅️⬆️\n",
      "[[-0.0075 -0.0211 -0.0156 -0.0036 -0.0049]\n",
      " [-0.0246 -0.0123 -0.0163 -0.0095 -0.0034]\n",
      " [-0.0084 -0.0369  0.006  -0.0086 -0.0021]\n",
      " [-0.0266  0.0152  0.0541  0.0025 -0.0035]\n",
      " [-0.0218 -0.0087  0.0084 -0.0036 -0.0022]]\n",
      "mean_state_value -0.006205954931116952\n",
      "episode 70/600\n",
      "p1 0.6568 p0 0.08579999999999999\n",
      "trajectorySteps 61\n",
      "[[ 0  0  2 12  1]\n",
      " [ 0  0  2 12  4]\n",
      " [ 7  2  1  0  6]\n",
      " [ 2  0  2  1  5]\n",
      " [ 0  0  1  1  0]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬆️\n",
      "🔄⏩️⬆️⬅️⬆️\n",
      "[[-0.0075 -0.0211 -0.0156 -0.0038 -0.0049]\n",
      " [-0.0245 -0.0122 -0.0163 -0.0103 -0.0034]\n",
      " [-0.0084 -0.0376  0.0051 -0.0086 -0.0022]\n",
      " [-0.0266  0.0153  0.0554  0.0025 -0.0044]\n",
      " [-0.0218 -0.0087  0.0092 -0.0036 -0.0022]]\n",
      "mean_state_value -0.0062466769509344254\n",
      "episode 71/600\n",
      "p1 0.6576 p0 0.08559999999999998\n",
      "trajectorySteps 86\n",
      "[[15  1  2  3  2]\n",
      " [ 2  0  0  0  5]\n",
      " [ 6  1  0  5 35]\n",
      " [ 1  1  2  2  3]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "🔄⏩️⬆️⬅️⬆️\n",
      "[[-0.0075 -0.021  -0.0155 -0.0037 -0.0049]\n",
      " [-0.0246 -0.0122 -0.0163 -0.0103 -0.0034]\n",
      " [-0.0084 -0.0384  0.0051 -0.0095 -0.0024]\n",
      " [-0.0265  0.0153  0.0554  0.0023 -0.0044]\n",
      " [-0.0218 -0.0087  0.0092 -0.0036 -0.0022]]\n",
      "mean_state_value -0.0063240928161683\n",
      "episode 72/600\n",
      "p1 0.6584000000000001 p0 0.08539999999999999\n",
      "trajectorySteps 39\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [19  2  1  0  0]\n",
      " [ 5  0  2  0  0]\n",
      " [10  0  0  0  0]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "🔄⏩️⬆️⬅️⬆️\n",
      "[[-0.0075 -0.021  -0.0155 -0.0037 -0.0049]\n",
      " [-0.0245 -0.0122 -0.0162 -0.0103 -0.0034]\n",
      " [-0.0087 -0.0391  0.0059 -0.0094 -0.0024]\n",
      " [-0.0265  0.0154  0.0567  0.0023 -0.0044]\n",
      " [-0.0218 -0.0087  0.0092 -0.0036 -0.0022]]\n",
      "mean_state_value -0.006268546499844861\n",
      "episode 73/600\n",
      "p1 0.6592 p0 0.08519999999999998\n",
      "trajectorySteps 78\n",
      "[[40  3  2  1  1]\n",
      " [ 9  1  0  0  2]\n",
      " [ 1  0  0  0  7]\n",
      " [ 0  0  2  1  7]\n",
      " [ 0  0  0  0  1]]\n",
      "⬇️➡️➡️➡️⬇️\n",
      "⬇️⏪⏩️➡️⬇️\n",
      "⬇️⬅️⏬➡️⬅️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "🔄⏩️⬆️⬅️⬆️\n",
      "[[-0.008  -0.021  -0.0155 -0.0037 -0.0048]\n",
      " [-0.0254 -0.0122 -0.0162 -0.0103 -0.0034]\n",
      " [-0.0087 -0.0391  0.0059 -0.0094 -0.0024]\n",
      " [-0.0264  0.0154  0.0582  0.003  -0.0052]\n",
      " [-0.0218 -0.0087  0.0092 -0.0036 -0.0022]]\n",
      "mean_state_value -0.006248057908626271\n",
      "episode 74/600\n",
      "p1 0.66 p0 0.08499999999999999\n",
      "trajectorySteps 11\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [3 0 0 0 0]\n",
      " [3 1 2 0 0]\n",
      " [0 1 1 0 0]]\n",
      "⬇️➡️➡️➡️⬇️\n",
      "⬇️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬅️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "🔄⏩️⬆️⬅️⬆️\n",
      "[[-0.008  -0.0209 -0.0154 -0.0037 -0.0048]\n",
      " [-0.0254 -0.0121 -0.0161 -0.0103 -0.0034]\n",
      " [-0.0088 -0.039   0.0059 -0.0094 -0.0024]\n",
      " [-0.0273  0.0146  0.0595  0.003  -0.0052]\n",
      " [-0.0217 -0.0086  0.01   -0.0036 -0.0022]]\n",
      "mean_state_value -0.006210649142605919\n",
      "episode 75/600\n",
      "p1 0.6608 p0 0.08479999999999999\n",
      "trajectorySteps 46\n",
      "[[ 2  0  0  0  0]\n",
      " [ 6  2  0  0  0]\n",
      " [19  3  0  0  0]\n",
      " [ 5  1  2  0  0]\n",
      " [ 6  0  0  0  0]]\n",
      "⬇️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬅️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "🔄⏩️⬆️⬅️⬆️\n",
      "[[-0.008  -0.0209 -0.0154 -0.0037 -0.0048]\n",
      " [-0.0261 -0.0121 -0.0161 -0.0102 -0.0034]\n",
      " [-0.0088 -0.0405  0.006  -0.0094 -0.0024]\n",
      " [-0.0273  0.0154  0.061   0.003  -0.0052]\n",
      " [-0.0218 -0.0086  0.01   -0.0036 -0.0022]]\n",
      "mean_state_value -0.006205772415831444\n",
      "episode 76/600\n",
      "p1 0.6616 p0 0.08459999999999998\n",
      "trajectorySteps 18\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [9 2 1 0 0]\n",
      " [3 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "⬇️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬅️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "🔄⏩️⬆️⬅️⬆️\n",
      "[[-0.008  -0.0208 -0.0154 -0.0037 -0.0048]\n",
      " [-0.0261 -0.0121 -0.0161 -0.0102 -0.0034]\n",
      " [-0.0089 -0.0413  0.0065 -0.0093 -0.0024]\n",
      " [-0.0282  0.0154  0.061   0.0031 -0.0052]\n",
      " [-0.0217 -0.0086  0.01   -0.0035 -0.0022]]\n",
      "mean_state_value -0.006233311578123908\n",
      "episode 77/600\n",
      "p1 0.6624000000000001 p0 0.08439999999999999\n",
      "trajectorySteps 36\n",
      "[[1 1 1 1 1]\n",
      " [1 0 1 2 3]\n",
      " [2 0 1 8 9]\n",
      " [0 0 2 1 1]\n",
      " [0 0 0 0 0]]\n",
      "⬇️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "🔄⏩️⬆️⬅️⬆️\n",
      "[[-0.008  -0.0208 -0.0153 -0.0037 -0.0048]\n",
      " [-0.026  -0.0121 -0.016  -0.0102 -0.0034]\n",
      " [-0.0089 -0.0412  0.0057 -0.011  -0.0024]\n",
      " [-0.0281  0.0155  0.0609  0.0037 -0.0052]\n",
      " [-0.0217 -0.0086  0.01   -0.0035 -0.0022]]\n",
      "mean_state_value -0.006285186646836677\n",
      "episode 78/600\n",
      "p1 0.6632 p0 0.08419999999999998\n",
      "trajectorySteps 118\n",
      "[[25  1  1  0  0]\n",
      " [18  0  1  1  1]\n",
      " [ 8  1  0  2 21]\n",
      " [ 4  0  2  1 22]\n",
      " [ 7  0  0  0  2]]\n",
      "🔄⬅️➡️➡️⬇️\n",
      "⬇️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬆️\n",
      "[[-0.0085 -0.0208 -0.0161 -0.0037 -0.0048]\n",
      " [-0.0261 -0.012  -0.016  -0.0102 -0.0034]\n",
      " [-0.0089 -0.0411  0.0058 -0.011  -0.0025]\n",
      " [-0.0281  0.0155  0.0624  0.0044 -0.0061]\n",
      " [-0.0219 -0.0085  0.0101 -0.0035 -0.0022]]\n",
      "mean_state_value -0.0062925506875273676\n",
      "episode 79/600\n",
      "p1 0.664 p0 0.08399999999999999\n",
      "trajectorySteps 87\n",
      "[[18  1  0  0  0]\n",
      " [ 7  0  0  0  0]\n",
      " [46  2  0  0  0]\n",
      " [ 5  0  2  0  0]\n",
      " [ 4  1  1  0  0]]\n",
      "⬇️⬅️➡️➡️⬇️\n",
      "⬇️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬆️\n",
      "[[-0.0086 -0.0207 -0.0161 -0.0037 -0.0048]\n",
      " [-0.0262 -0.012  -0.016  -0.0101 -0.0034]\n",
      " [-0.009  -0.041   0.0058 -0.011  -0.0025]\n",
      " [-0.0281  0.0156  0.0639  0.0044 -0.0061]\n",
      " [-0.0228 -0.0085  0.0108 -0.0035 -0.0022]]\n",
      "mean_state_value -0.006229143873701319\n",
      "episode 80/600\n",
      "p1 0.6648000000000001 p0 0.08379999999999999\n",
      "trajectorySteps 58\n",
      "[[ 1  0  0  0  0]\n",
      " [ 4  0  0  0  0]\n",
      " [36 11  3  0  0]\n",
      " [ 1  0  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "⬇️⬅️➡️➡️⬇️\n",
      "⬇️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬆️\n",
      "[[-0.0086 -0.0207 -0.016  -0.0037 -0.0048]\n",
      " [-0.0262 -0.012  -0.0159 -0.0101 -0.0034]\n",
      " [-0.0091 -0.0417  0.0047 -0.0109 -0.0025]\n",
      " [-0.0281  0.0156  0.0639  0.0044 -0.006 ]\n",
      " [-0.0227 -0.0085  0.0108 -0.0035 -0.0022]]\n",
      "mean_state_value -0.006288074099193936\n",
      "episode 81/600\n",
      "p1 0.6656 p0 0.08359999999999998\n",
      "trajectorySteps 89\n",
      "[[ 1  0  0  0  0]\n",
      " [ 5  1  0  0  0]\n",
      " [69  5  0  0  0]\n",
      " [ 4  2  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "⬇️⬅️➡️➡️⬇️\n",
      "⬇️⏪⏩️➡️⬇️\n",
      "⬇️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬆️\n",
      "[[-0.0086 -0.0206 -0.016  -0.0037 -0.0048]\n",
      " [-0.0261 -0.0119 -0.0159 -0.0101 -0.0034]\n",
      " [-0.0095 -0.0441  0.0047 -0.0109 -0.0025]\n",
      " [-0.028   0.0162  0.0639  0.0044 -0.006 ]\n",
      " [-0.0227 -0.0085  0.0109 -0.0035 -0.0022]]\n",
      "mean_state_value -0.006349857118938237\n",
      "episode 82/600\n",
      "p1 0.6664000000000001 p0 0.08339999999999999\n",
      "trajectorySteps 49\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [13  2  1  0  0]\n",
      " [22  1  2  0  0]\n",
      " [ 8  0  0  0  0]]\n",
      "⬇️⬅️➡️➡️⬇️\n",
      "⬇️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬆️\n",
      "[[-0.0086 -0.0206 -0.016  -0.0037 -0.0047]\n",
      " [-0.0261 -0.0119 -0.0158 -0.0101 -0.0034]\n",
      " [-0.0095 -0.0448  0.0055 -0.0109 -0.0025]\n",
      " [-0.029   0.0163  0.0654  0.0045 -0.006 ]\n",
      " [-0.0229 -0.0084  0.0109 -0.0035 -0.0022]]\n",
      "mean_state_value -0.006321479152666553\n",
      "episode 83/600\n",
      "p1 0.6672 p0 0.08319999999999998\n",
      "trajectorySteps 22\n",
      "[[ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [14  0  0  0  0]\n",
      " [ 4  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "⬇️⬅️➡️➡️⬇️\n",
      "⬇️⏪⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️🔄\n",
      "🔄⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬆️\n",
      "[[-0.0085 -0.0205 -0.0159 -0.0037 -0.0047]\n",
      " [-0.026  -0.0119 -0.0158 -0.01   -0.0033]\n",
      " [-0.0097 -0.0447  0.0055 -0.0109 -0.0025]\n",
      " [-0.0298  0.0169  0.0654  0.0045 -0.006 ]\n",
      " [-0.0228 -0.0084  0.0109 -0.0035 -0.0022]]\n",
      "mean_state_value -0.006309357561643568\n",
      "episode 84/600\n",
      "p1 0.668 p0 0.08299999999999999\n",
      "trajectorySteps 5\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [1 0 0 0 0]\n",
      " [1 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "⬇️⬅️➡️➡️⬇️\n",
      "⬇️⏪⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️🔄\n",
      "🔄⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬆️\n",
      "[[-0.0085 -0.0205 -0.0159 -0.0036 -0.0047]\n",
      " [-0.0259 -0.0119 -0.0158 -0.01   -0.0033]\n",
      " [-0.0097 -0.0446  0.0056 -0.0108 -0.0025]\n",
      " [-0.0305  0.0177  0.0667  0.0045 -0.006 ]\n",
      " [-0.0228 -0.0084  0.0109 -0.0035 -0.0022]]\n",
      "mean_state_value -0.006230980754177373\n",
      "episode 85/600\n",
      "p1 0.6688000000000001 p0 0.08279999999999998\n",
      "trajectorySteps 38\n",
      "[[ 2  0  0  0  0]\n",
      " [12  0  0  0  0]\n",
      " [17  2  0  0  0]\n",
      " [ 2  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "⬇️⬅️➡️➡️⬇️\n",
      "⬇️⏪⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️🔄\n",
      "🔄⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬆️\n",
      "[[-0.0085 -0.0204 -0.0159 -0.0036 -0.0047]\n",
      " [-0.0259 -0.0118 -0.0157 -0.01   -0.0033]\n",
      " [-0.0098 -0.0445  0.0056 -0.0108 -0.0025]\n",
      " [-0.0312  0.0184  0.0675  0.0045 -0.006 ]\n",
      " [-0.0227 -0.0084  0.0109 -0.0035 -0.0022]]\n",
      "mean_state_value -0.006181417230592787\n",
      "episode 86/600\n",
      "p1 0.6696 p0 0.08259999999999998\n",
      "trajectorySteps 19\n",
      "[[2 0 0 0 0]\n",
      " [5 0 0 0 0]\n",
      " [8 1 1 0 0]\n",
      " [0 0 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "⬇️⬅️➡️➡️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️🔄\n",
      "🔄⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬆️\n",
      "[[-0.0086 -0.0204 -0.0158 -0.0036 -0.0047]\n",
      " [-0.0259 -0.0118 -0.0157 -0.01   -0.0033]\n",
      " [-0.01   -0.0452  0.0064 -0.0108 -0.0025]\n",
      " [-0.0311  0.0185  0.069   0.0045 -0.006 ]\n",
      " [-0.0227 -0.0084  0.011  -0.0035 -0.0022]]\n",
      "mean_state_value -0.006106686717169041\n",
      "episode 87/600\n",
      "p1 0.6704000000000001 p0 0.08239999999999999\n",
      "trajectorySteps 67\n",
      "[[19  4  1  1  1]\n",
      " [21  1  0  0  1]\n",
      " [ 6  1  0  1  5]\n",
      " [ 0  0  2  1  0]\n",
      " [ 0  0  1  1  0]]\n",
      "🔄⬅️➡️➡️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "🔄⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬆️\n",
      "[[-0.0086 -0.0204 -0.0158 -0.0036 -0.0047]\n",
      " [-0.0267 -0.0118 -0.0157 -0.0099 -0.0033]\n",
      " [-0.0099 -0.0451  0.0064 -0.0116 -0.0026]\n",
      " [-0.0311  0.0185  0.0706  0.0045 -0.0059]\n",
      " [-0.0226 -0.0083  0.0117 -0.0034 -0.0022]]\n",
      "mean_state_value -0.006060175632092661\n",
      "episode 88/600\n",
      "p1 0.6712 p0 0.08219999999999998\n",
      "trajectorySteps 73\n",
      "[[36  5  3  4  2]\n",
      " [ 9  0  1  1  1]\n",
      " [ 3  0  0  0  2]\n",
      " [ 0  0  2  1  2]\n",
      " [ 0  0  0  0  1]]\n",
      "➡️⬅️➡️➡️⬇️\n",
      "⬆️⏪⏩️➡️⬆️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "🔄⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0089 -0.0204 -0.0157 -0.0036 -0.0047]\n",
      " [-0.0266 -0.0117 -0.0156 -0.0107 -0.0033]\n",
      " [-0.0099 -0.045   0.0065 -0.0115 -0.0026]\n",
      " [-0.031   0.0186  0.0721  0.0053 -0.0067]\n",
      " [-0.0226 -0.0083  0.0118 -0.0034 -0.0022]]\n",
      "mean_state_value -0.006021888576695212\n",
      "episode 89/600\n",
      "p1 0.672 p0 0.08199999999999999\n",
      "trajectorySteps 120\n",
      "[[39 32  0  0  3]\n",
      " [ 8  5  2  1  6]\n",
      " [ 5  4  0  0  1]\n",
      " [ 7  0  2  2  1]\n",
      " [ 0  0  0  1  1]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬇️⏪⏩️➡️⬆️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "🔄⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️🔄\n",
      "[[-0.0095 -0.0247 -0.0157 -0.0036 -0.0047]\n",
      " [-0.0266 -0.0125 -0.0164 -0.0107 -0.0033]\n",
      " [-0.0099 -0.0449  0.0065 -0.0115 -0.0026]\n",
      " [-0.0309  0.0186  0.0736  0.0052 -0.0067]\n",
      " [-0.0225 -0.0083  0.0118 -0.0042 -0.0022]]\n",
      "mean_state_value -0.006233579902888987\n",
      "episode 90/600\n",
      "p1 0.6728000000000001 p0 0.08179999999999998\n",
      "trajectorySteps 6\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [1 0 0 0 0]\n",
      " [2 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬇️⏪⏩️➡️⬆️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️🔄\n",
      "[[-0.0095 -0.0246 -0.0157 -0.0036 -0.0047]\n",
      " [-0.0265 -0.0125 -0.0164 -0.0107 -0.0033]\n",
      " [-0.0099 -0.0448  0.0065 -0.0115 -0.0026]\n",
      " [-0.0317  0.0192  0.0736  0.0053 -0.0067]\n",
      " [-0.0225 -0.0083  0.0118 -0.0042 -0.0022]]\n",
      "mean_state_value -0.006211834346020643\n",
      "episode 91/600\n",
      "p1 0.6736 p0 0.08159999999999998\n",
      "trajectorySteps 33\n",
      "[[9 1 1 0 2]\n",
      " [1 0 1 1 4]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 1 3]\n",
      " [0 0 0 0 4]]\n",
      "⬇️⬅️➡️➡️⬇️\n",
      "⬇️⏪⏩️➡️⬆️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️🔄\n",
      "[[-0.0098 -0.0246 -0.0164 -0.0036 -0.0046]\n",
      " [-0.0264 -0.0125 -0.0163 -0.0106 -0.0033]\n",
      " [-0.0099 -0.0447  0.0065 -0.0115 -0.0026]\n",
      " [-0.0316  0.0193  0.0745  0.0059 -0.0075]\n",
      " [-0.0224 -0.0083  0.0118 -0.0042 -0.0023]]\n",
      "mean_state_value -0.006206348433127711\n",
      "episode 92/600\n",
      "p1 0.6744000000000001 p0 0.08139999999999999\n",
      "trajectorySteps 128\n",
      "[[ 6  0  0  0  0]\n",
      " [56  9  0  0  0]\n",
      " [45  5  2  0  0]\n",
      " [ 3  0  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄⬅️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬆️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️🔄\n",
      "[[-0.0099 -0.0246 -0.0164 -0.0036 -0.0046]\n",
      " [-0.0334 -0.0134 -0.0163 -0.0106 -0.0033]\n",
      " [-0.0103 -0.0463  0.0073 -0.0114 -0.0026]\n",
      " [-0.0315  0.0193  0.0758  0.0059 -0.0075]\n",
      " [-0.0224 -0.0082  0.0118 -0.0042 -0.0023]]\n",
      "mean_state_value -0.006501061044938521\n",
      "episode 93/600\n",
      "p1 0.6752 p0 0.08119999999999998\n",
      "trajectorySteps 73\n",
      "[[ 8  3  1  1  9]\n",
      " [ 3  1  0  1 10]\n",
      " [ 1  0  0  1  4]\n",
      " [ 0  0  2  0  3]\n",
      " [ 0  0  2  2 21]]\n",
      "🔄⬅️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️🔄\n",
      "[[-0.01   -0.0246 -0.0163 -0.0036 -0.0049]\n",
      " [-0.0341 -0.0134 -0.0162 -0.0106 -0.0034]\n",
      " [-0.0103 -0.0462  0.0074 -0.0114 -0.0026]\n",
      " [-0.0315  0.0194  0.0759  0.006  -0.0075]\n",
      " [-0.0223 -0.0082  0.0125 -0.0042 -0.0024]]\n",
      "mean_state_value -0.006501133901172958\n",
      "episode 94/600\n",
      "p1 0.676 p0 0.08099999999999999\n",
      "trajectorySteps 146\n",
      "[[99 14  0  0  1]\n",
      " [ 6  5  1  1  4]\n",
      " [ 5  0  0  0  1]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  2  1  3]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️🔄\n",
      "[[-0.0117 -0.0263 -0.0163 -0.0036 -0.0048]\n",
      " [-0.0356 -0.0151 -0.0162 -0.0106 -0.0034]\n",
      " [-0.0105 -0.0461  0.0074 -0.0114 -0.0026]\n",
      " [-0.0314  0.0194  0.0767  0.006  -0.0075]\n",
      " [-0.0223 -0.0082  0.0131 -0.0042 -0.0024]]\n",
      "mean_state_value -0.006695715983411122\n",
      "episode 95/600\n",
      "p1 0.6768000000000001 p0 0.08079999999999998\n",
      "trajectorySteps 49\n",
      "[[ 6  1  2  4  1]\n",
      " [ 5  0  0  0  2]\n",
      " [ 7  1  0  0  2]\n",
      " [ 0  0  2  1  4]\n",
      " [ 0  0  0  1 10]]\n",
      "➡️➡️➡️➡️⬅️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️🔄\n",
      "[[-0.0118 -0.0262 -0.0163 -0.0036 -0.0049]\n",
      " [-0.0356 -0.0151 -0.0162 -0.0106 -0.0035]\n",
      " [-0.0105 -0.0459  0.0074 -0.0113 -0.0026]\n",
      " [-0.0313  0.0195  0.0776  0.0067 -0.0074]\n",
      " [-0.0222 -0.0082  0.0131 -0.005  -0.0024]]\n",
      "mean_state_value -0.006646151613653941\n",
      "episode 96/600\n",
      "p1 0.6776 p0 0.08059999999999998\n",
      "trajectorySteps 101\n",
      "[[ 2  6  4 21 24]\n",
      " [ 1  1  0  1  3]\n",
      " [25  2  0  0  1]\n",
      " [ 5  0  2  2  1]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️➡️🔄⬅️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️🔄\n",
      "[[-0.0118 -0.027  -0.0163 -0.0037 -0.0053]\n",
      " [-0.0355 -0.0151 -0.0161 -0.0105 -0.0036]\n",
      " [-0.0108 -0.0458  0.0075 -0.0113 -0.0026]\n",
      " [-0.0313  0.0195  0.0791  0.0066 -0.0082]\n",
      " [-0.0222 -0.0082  0.0131 -0.005  -0.0023]]\n",
      "mean_state_value -0.006669209284949152\n",
      "episode 97/600\n",
      "p1 0.6784000000000001 p0 0.08039999999999999\n",
      "trajectorySteps 149\n",
      "[[ 1  1  4 29  4]\n",
      " [ 1  0  0  2  8]\n",
      " [ 2  0  0  2 56]\n",
      " [ 0  0  2  0  2]\n",
      " [ 0  0  2  1 32]]\n",
      "➡️➡️➡️🔄⬅️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬆️\n",
      "[[-0.0118 -0.027  -0.0163 -0.0039 -0.0054]\n",
      " [-0.0354 -0.015  -0.0161 -0.0105 -0.0037]\n",
      " [-0.0108 -0.0457  0.0075 -0.0113 -0.0031]\n",
      " [-0.0312  0.0196  0.0792  0.0067 -0.0082]\n",
      " [-0.0221 -0.0081  0.0138 -0.005  -0.0031]]\n",
      "mean_state_value -0.006677821815578458\n",
      "episode 98/600\n",
      "p1 0.6792 p0 0.08019999999999998\n",
      "trajectorySteps 25\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [13  1  0  0  0]\n",
      " [ 3  0  2  0  0]\n",
      " [ 3  1  2  0  0]]\n",
      "➡️➡️➡️🔄⬅️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬆️\n",
      "[[-0.0117 -0.0269 -0.0162 -0.0039 -0.0054]\n",
      " [-0.0353 -0.015  -0.016  -0.0105 -0.0037]\n",
      " [-0.0107 -0.0456  0.0075 -0.0113 -0.0031]\n",
      " [-0.0312  0.0197  0.0807  0.0067 -0.0082]\n",
      " [-0.0228 -0.0081  0.0146 -0.005  -0.0031]]\n",
      "mean_state_value -0.00658488918791098\n",
      "episode 99/600\n",
      "p1 0.68 p0 0.07999999999999999\n",
      "trajectorySteps 109\n",
      "[[ 1  3  6 16  4]\n",
      " [ 3  2  0  1  0]\n",
      " [38  3  0  1  1]\n",
      " [ 7  0  2  1  5]\n",
      " [ 7  0  0  0  8]]\n",
      "➡️➡️➡️🔄⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0117 -0.0276 -0.0163 -0.004  -0.0054]\n",
      " [-0.036  -0.015  -0.016  -0.0105 -0.0037]\n",
      " [-0.0112 -0.0455  0.0076 -0.0112 -0.0031]\n",
      " [-0.0312  0.0197  0.0822  0.0075 -0.009 ]\n",
      " [-0.0229 -0.0081  0.0146 -0.0049 -0.0033]]\n",
      "mean_state_value -0.0066046737828232205\n",
      "episode 100/600\n",
      "p1 0.6808000000000001 p0 0.07979999999999998\n",
      "trajectorySteps 76\n",
      "[[ 2  1  3 15  0]\n",
      " [ 1  0  1  5  2]\n",
      " [ 1  0  0  4 21]\n",
      " [ 0  0  2  2 14]\n",
      " [ 0  0  1  1  0]]\n",
      "➡️⬅️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "🔄⬅️⏬➡️⬅️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0117 -0.0276 -0.017  -0.0041 -0.0054]\n",
      " [-0.036  -0.0149 -0.016  -0.0105 -0.0037]\n",
      " [-0.0111 -0.0454  0.0076 -0.0112 -0.0035]\n",
      " [-0.0311  0.0198  0.0836  0.0075 -0.0106]\n",
      " [-0.0229 -0.0081  0.0154 -0.0049 -0.0033]]\n",
      "mean_state_value -0.006605444036877052\n",
      "episode 101/600\n",
      "p1 0.6816 p0 0.07959999999999998\n",
      "trajectorySteps 84\n",
      "[[14 13  1  2  3]\n",
      " [ 5  1  0  2  3]\n",
      " [21  1  0  6  5]\n",
      " [ 1  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "🔄⬅️➡️➡️🔄\n",
      "⬇️⏪⏩️⬇️⬇️\n",
      "🔄⬅️⏬➡️⬅️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.012  -0.0284 -0.017  -0.0042 -0.0054]\n",
      " [-0.0359 -0.0149 -0.0159 -0.0104 -0.0038]\n",
      " [-0.0115 -0.0453  0.0076 -0.0112 -0.0035]\n",
      " [-0.031   0.0198  0.0851  0.0075 -0.0106]\n",
      " [-0.0228 -0.0081  0.0162 -0.0049 -0.0033]]\n",
      "mean_state_value -0.006550905396181202\n",
      "episode 102/600\n",
      "p1 0.6824000000000001 p0 0.07939999999999998\n",
      "trajectorySteps 31\n",
      "[[ 0  0  0  0  0]\n",
      " [ 2  0  0  0  0]\n",
      " [22  3  1  0  0]\n",
      " [ 1  0  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄⬅️➡️➡️🔄\n",
      "⬇️⏪⏩️⬇️⬇️\n",
      "🔄⬅️⏬➡️⬅️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0119 -0.0283 -0.017  -0.0042 -0.0054]\n",
      " [-0.0358 -0.0149 -0.0159 -0.0104 -0.0038]\n",
      " [-0.0117 -0.0459  0.0084 -0.0112 -0.0035]\n",
      " [-0.0309  0.0199  0.0867  0.0075 -0.0106]\n",
      " [-0.0228 -0.008   0.0162 -0.0049 -0.0033]]\n",
      "mean_state_value -0.006461006433970336\n",
      "episode 103/600\n",
      "p1 0.6832 p0 0.07919999999999998\n",
      "trajectorySteps 119\n",
      "[[25  3  0  0  0]\n",
      " [ 7  1  0  0  0]\n",
      " [63  6  0  0  0]\n",
      " [ 9  1  2  0  0]\n",
      " [ 2  0  0  0  0]]\n",
      "⬇️⬅️➡️➡️🔄\n",
      "⬇️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬅️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0124 -0.029  -0.0169 -0.0042 -0.0054]\n",
      " [-0.0357 -0.0148 -0.0158 -0.0104 -0.0037]\n",
      " [-0.0121 -0.0466  0.0085 -0.0111 -0.0035]\n",
      " [-0.031   0.0207  0.0882  0.0076 -0.0105]\n",
      " [-0.0228 -0.008   0.0162 -0.0049 -0.0033]]\n",
      "mean_state_value -0.006441204085400194\n",
      "episode 104/600\n",
      "p1 0.684 p0 0.07899999999999999\n",
      "trajectorySteps 237\n",
      "[[13  3  0  0  0]\n",
      " [97  9  0  0  0]\n",
      " [92 13  0  0  0]\n",
      " [ 7  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄⬅️➡️➡️🔄\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬇️⬅️⏬➡️⬅️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0126 -0.0297 -0.0169 -0.0042 -0.0054]\n",
      " [-0.0413 -0.0157 -0.0158 -0.0104 -0.0037]\n",
      " [-0.0129 -0.0481  0.0085 -0.0111 -0.0035]\n",
      " [-0.031   0.0215  0.0898  0.0076 -0.0105]\n",
      " [-0.0228 -0.008   0.0163 -0.0048 -0.0033]]\n",
      "mean_state_value -0.006714146905595525\n",
      "episode 105/600\n",
      "p1 0.6848000000000001 p0 0.07879999999999998\n",
      "trajectorySteps 11\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [5 1 0 0 0]\n",
      " [2 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄⬅️➡️➡️🔄\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "🔄⬅️⏬➡️⬅️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0126 -0.0296 -0.0169 -0.0041 -0.0053]\n",
      " [-0.0412 -0.0157 -0.0158 -0.0103 -0.0037]\n",
      " [-0.013  -0.0479  0.0085 -0.0111 -0.0035]\n",
      " [-0.0317  0.0223  0.0914  0.0076 -0.0105]\n",
      " [-0.0227 -0.008   0.0163 -0.0048 -0.0033]]\n",
      "mean_state_value -0.006623365694154947\n",
      "episode 106/600\n",
      "p1 0.6856 p0 0.07859999999999998\n",
      "trajectorySteps 121\n",
      "[[47  4  0  0  0]\n",
      " [14  3  0  0  0]\n",
      " [39 10  1  0  0]\n",
      " [ 1  0  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️⬅️➡️➡️🔄\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬇️⬅️⏬➡️⬅️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0131 -0.0296 -0.0168 -0.0041 -0.0053]\n",
      " [-0.0435 -0.0156 -0.0157 -0.0103 -0.0037]\n",
      " [-0.0135 -0.0487  0.0094 -0.0111 -0.0034]\n",
      " [-0.0316  0.0224  0.0929  0.0076 -0.0105]\n",
      " [-0.0227 -0.008   0.0163 -0.0048 -0.0033]]\n",
      "mean_state_value -0.006670218702913363\n",
      "episode 107/600\n",
      "p1 0.6864000000000001 p0 0.07839999999999998\n",
      "trajectorySteps 45\n",
      "[[ 2  1  1  2  4]\n",
      " [ 1  0  0  1  1]\n",
      " [10  3  2  3  1]\n",
      " [ 9  1  2  0  0]\n",
      " [ 1  0  0  0  0]]\n",
      "➡️⬅️➡️➡️🔄\n",
      "⬆️⏬⏩️⬆️⬇️\n",
      "⬇️⬅️⏬➡️⬅️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0132 -0.0295 -0.0168 -0.0041 -0.0053]\n",
      " [-0.0435 -0.0156 -0.0157 -0.0103 -0.0037]\n",
      " [-0.0134 -0.0486  0.0094 -0.0118 -0.0034]\n",
      " [-0.0324  0.0224  0.0945  0.0076 -0.0104]\n",
      " [-0.0226 -0.0079  0.0163 -0.0048 -0.0033]]\n",
      "mean_state_value -0.0066431991765405\n",
      "episode 108/600\n",
      "p1 0.6872 p0 0.07819999999999998\n",
      "trajectorySteps 44\n",
      "[[ 5  2  0  0  0]\n",
      " [ 6  0  0  0  0]\n",
      " [14  1  1  0  0]\n",
      " [13  0  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️⬅️➡️➡️🔄\n",
      "⬆️⏬⏩️⬆️⬇️\n",
      "⬇️⬅️⏬➡️⬅️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0132 -0.0295 -0.0167 -0.0041 -0.0053]\n",
      " [-0.0433 -0.0156 -0.0156 -0.0103 -0.0037]\n",
      " [-0.0135 -0.0492  0.0102 -0.0118 -0.0034]\n",
      " [-0.0325  0.0225  0.0959  0.0077 -0.0104]\n",
      " [-0.0226 -0.0079  0.0164 -0.0048 -0.0033]]\n",
      "mean_state_value -0.006561993119645701\n",
      "episode 109/600\n",
      "p1 0.6880000000000001 p0 0.07799999999999999\n",
      "trajectorySteps 11\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [5 1 1 0 0]\n",
      " [2 0 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️⬅️➡️➡️🔄\n",
      "⬆️⏬⏩️⬆️⬇️\n",
      "⬇️⬅️⏬➡️⬅️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0132 -0.0294 -0.0167 -0.0041 -0.0053]\n",
      " [-0.0432 -0.0155 -0.0156 -0.0102 -0.0037]\n",
      " [-0.0134 -0.0499  0.0109 -0.0118 -0.0034]\n",
      " [-0.0325  0.0226  0.096   0.0077 -0.0104]\n",
      " [-0.0225 -0.0079  0.0164 -0.0048 -0.0033]]\n",
      "mean_state_value -0.006528388950200026\n",
      "episode 110/600\n",
      "p1 0.6888000000000001 p0 0.07779999999999998\n",
      "trajectorySteps 67\n",
      "[[21 15  1  0  0]\n",
      " [ 5  1  1  0  0]\n",
      " [10  1  0  0  0]\n",
      " [ 9  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️🔄➡️➡️🔄\n",
      "⬆️⏬⏩️⬆️⬇️\n",
      "⬇️⬅️⏬➡️⬅️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0134 -0.0295 -0.0174 -0.0041 -0.0053]\n",
      " [-0.0431 -0.0155 -0.0163 -0.0102 -0.0037]\n",
      " [-0.0136 -0.0497  0.0109 -0.0117 -0.0034]\n",
      " [-0.0333  0.0232  0.0961  0.0077 -0.0104]\n",
      " [-0.0225 -0.0079  0.0164 -0.0048 -0.0033]]\n",
      "mean_state_value -0.006590046848201207\n",
      "episode 111/600\n",
      "p1 0.6896 p0 0.07759999999999997\n",
      "trajectorySteps 31\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 2  0  0  0  0]\n",
      " [10  0  2  0  0]\n",
      " [12  2  2  1  0]]\n",
      "➡️🔄➡️➡️🔄\n",
      "⬆️⏬⏩️⬆️⬇️\n",
      "⬇️⬅️⏬➡️⬅️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0134 -0.0294 -0.0174 -0.0041 -0.0053]\n",
      " [-0.043  -0.0155 -0.0163 -0.0102 -0.0037]\n",
      " [-0.0136 -0.0496  0.0109 -0.0117 -0.0034]\n",
      " [-0.0333  0.0232  0.0976  0.0077 -0.0103]\n",
      " [-0.0242 -0.0079  0.0172 -0.0047 -0.0032]]\n",
      "mean_state_value -0.006535587500061485\n",
      "episode 112/600\n",
      "p1 0.6904000000000001 p0 0.07739999999999998\n",
      "trajectorySteps 60\n",
      "[[ 7 16  0  0  0]\n",
      " [ 5  2  0  0  0]\n",
      " [12  5  0  0  0]\n",
      " [10  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️➡️➡️🔄\n",
      "⬆️⏬⏩️⬆️⬇️\n",
      "🔄⬅️⏬➡️⬅️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0134 -0.0309 -0.0173 -0.0041 -0.0053]\n",
      " [-0.043  -0.0154 -0.0162 -0.0102 -0.0037]\n",
      " [-0.0136 -0.0495  0.011  -0.0117 -0.0034]\n",
      " [-0.0341  0.0239  0.0977  0.0078 -0.0103]\n",
      " [-0.0241 -0.0078  0.0173 -0.0047 -0.0032]]\n",
      "mean_state_value -0.0065732915658251335\n",
      "episode 113/600\n",
      "p1 0.6912 p0 0.07719999999999998\n",
      "trajectorySteps 57\n",
      "[[ 1  1  1  2 16]\n",
      " [ 1  0  0  1  2]\n",
      " [ 1  0  0  8  9]\n",
      " [ 0  0  2  0  2]\n",
      " [ 0  0  2  3  5]]\n",
      "➡️➡️➡️➡️🔄\n",
      "⬆️⏬⏩️⬆️⬇️\n",
      "🔄⬅️⏬➡️⬅️\n",
      "⬆️⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0134 -0.0308 -0.0173 -0.0041 -0.0052]\n",
      " [-0.0429 -0.0154 -0.0162 -0.0101 -0.0037]\n",
      " [-0.0136 -0.0494  0.011  -0.0116 -0.0034]\n",
      " [-0.034   0.0239  0.0979  0.0078 -0.0103]\n",
      " [-0.024  -0.0078  0.0179 -0.0048 -0.0034]]\n",
      "mean_state_value -0.0065175069407741705\n",
      "episode 114/600\n",
      "p1 0.6920000000000001 p0 0.07699999999999999\n",
      "trajectorySteps 184\n",
      "[[  5   5   4  14 100]\n",
      " [  3   0   1   7   7]\n",
      " [  2   0   0  12  17]\n",
      " [  0   0   2   0   2]\n",
      " [  0   0   1   1   1]]\n",
      "➡️➡️➡️➡️⬅️\n",
      "⬆️⏬⏩️🔄⬆️\n",
      "🔄⬅️⏬➡️⬅️\n",
      "⬆️⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0135 -0.0308 -0.0172 -0.0042 -0.0067]\n",
      " [-0.0428 -0.0153 -0.0162 -0.0109 -0.0037]\n",
      " [-0.0136 -0.0492  0.0111 -0.0116 -0.0035]\n",
      " [-0.0339  0.024   0.0995  0.0078 -0.0103]\n",
      " [-0.024  -0.0078  0.0187 -0.0048 -0.0034]]\n",
      "mean_state_value -0.006502437900997943\n",
      "episode 115/600\n",
      "p1 0.6928000000000001 p0 0.07679999999999998\n",
      "trajectorySteps 60\n",
      "[[ 2  1  2  5  5]\n",
      " [ 1  0  0  2  1]\n",
      " [18  1  0  3  4]\n",
      " [ 1  0  2  0  6]\n",
      " [ 0  0  1  2  3]]\n",
      "➡️➡️➡️➡️⬅️\n",
      "⬆️⏬⏩️🔄⬆️\n",
      "🔄⬅️⏬⬆️⬅️\n",
      "⬆️⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0134 -0.0308 -0.0173 -0.0042 -0.0067]\n",
      " [-0.0427 -0.0153 -0.0161 -0.0109 -0.0037]\n",
      " [-0.0138 -0.0492  0.0111 -0.0116 -0.0036]\n",
      " [-0.0338  0.0241  0.1011  0.0078 -0.0103]\n",
      " [-0.0239 -0.0078  0.0195 -0.0047 -0.0034]]\n",
      "mean_state_value -0.006390228433425393\n",
      "episode 116/600\n",
      "p1 0.6936 p0 0.07659999999999997\n",
      "trajectorySteps 410\n",
      "[[  3   1   9 130 132]\n",
      " [  2   1  10  80  23]\n",
      " [  5   2   0   6   3]\n",
      " [  0   1   2   0   0]\n",
      " [  0   0   0   0   0]]\n",
      "➡️➡️➡️🔄⬇️\n",
      "⬆️⏬⏩️➡️⬅️\n",
      "🔄⬅️⏬⬆️⬅️\n",
      "⬆️⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0135 -0.0307 -0.0188 -0.005  -0.0081]\n",
      " [-0.0427 -0.0154 -0.0176 -0.0162 -0.0039]\n",
      " [-0.0138 -0.0498  0.0111 -0.0116 -0.0036]\n",
      " [-0.0338  0.0249  0.1024  0.0078 -0.0102]\n",
      " [-0.0239 -0.0078  0.0196 -0.0047 -0.0034]]\n",
      "mean_state_value -0.006742677839429705\n",
      "episode 117/600\n",
      "p1 0.6944000000000001 p0 0.07639999999999998\n",
      "trajectorySteps 17\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [11  0  0  0  0]\n",
      " [ 3  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️➡️🔄⬇️\n",
      "⬆️⏬⏩️➡️⬅️\n",
      "🔄⬅️⏬⬆️⬅️\n",
      "⬆️⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0134 -0.0306 -0.0188 -0.005  -0.0081]\n",
      " [-0.0426 -0.0154 -0.0176 -0.0162 -0.0039]\n",
      " [-0.014  -0.0497  0.0112 -0.0115 -0.0036]\n",
      " [-0.0345  0.0258  0.104   0.0079 -0.0102]\n",
      " [-0.0238 -0.0077  0.0196 -0.0047 -0.0034]]\n",
      "mean_state_value -0.006649904754253023\n",
      "episode 118/600\n",
      "p1 0.6952 p0 0.07619999999999998\n",
      "trajectorySteps 44\n",
      "[[ 1  1  1  1  3]\n",
      " [ 1  0  2  7  8]\n",
      " [16  0  1  0  0]\n",
      " [ 0  0  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️➡️🔄⬇️\n",
      "⬆️⏬⏩️➡️🔄\n",
      "⬆️⬅️⏬⬆️⬅️\n",
      "⬆️⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0134 -0.0305 -0.0187 -0.005  -0.0081]\n",
      " [-0.0425 -0.0154 -0.0183 -0.0176 -0.0039]\n",
      " [-0.0145 -0.0496  0.012  -0.0115 -0.0036]\n",
      " [-0.0344  0.0258  0.1055  0.0079 -0.0102]\n",
      " [-0.0237 -0.0077  0.0196 -0.0047 -0.0034]]\n",
      "mean_state_value -0.006632145882371871\n",
      "episode 119/600\n",
      "p1 0.6960000000000001 p0 0.07599999999999998\n",
      "trajectorySteps 24\n",
      "[[1 1 1 3 0]\n",
      " [1 0 2 3 3]\n",
      " [2 0 1 2 1]\n",
      " [1 0 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️🔄⬇️\n",
      "⬆️⏬⏩️➡️🔄\n",
      "⬆️⬅️⏬🔄⬅️\n",
      "⬆️⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0134 -0.0304 -0.0187 -0.005  -0.0081]\n",
      " [-0.0424 -0.0153 -0.0197 -0.0183 -0.0039]\n",
      " [-0.0145 -0.0494  0.0129 -0.0115 -0.0036]\n",
      " [-0.0343  0.0259  0.1071  0.0079 -0.0102]\n",
      " [-0.0237 -0.0077  0.0196 -0.0047 -0.0034]]\n",
      "mean_state_value -0.006587376754663861\n",
      "episode 120/600\n",
      "p1 0.6968000000000001 p0 0.07579999999999998\n",
      "trajectorySteps 73\n",
      "[[ 1  1  2  5  3]\n",
      " [ 1  0  0  3 18]\n",
      " [ 2  0  2 29  4]\n",
      " [ 0  0  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️➡️🔄⬇️\n",
      "⬆️⏬⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬅️\n",
      "⬆️⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0133 -0.0304 -0.0186 -0.005  -0.008 ]\n",
      " [-0.0422 -0.0153 -0.0197 -0.0183 -0.004 ]\n",
      " [-0.0144 -0.0493  0.0137 -0.013  -0.0035]\n",
      " [-0.0342  0.0259  0.1087  0.0079 -0.0101]\n",
      " [-0.0236 -0.0077  0.0197 -0.0047 -0.0034]]\n",
      "mean_state_value -0.006514101654491471\n",
      "episode 121/600\n",
      "p1 0.6976 p0 0.07559999999999997\n",
      "trajectorySteps 68\n",
      "[[ 3  2  4 13  0]\n",
      " [ 2  0  3  7  3]\n",
      " [ 2  2  0  6  8]\n",
      " [ 0  0  2  1  7]\n",
      " [ 0  0  0  2  1]]\n",
      "➡️➡️➡️🔄⬇️\n",
      "⬆️⏬⏩️➡️⬆️\n",
      "⬆️⬅️⏬➡️⬅️\n",
      "⬆️⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0134 -0.0303 -0.0186 -0.0051 -0.008 ]\n",
      " [-0.0422 -0.0153 -0.0196 -0.0204 -0.004 ]\n",
      " [-0.0144 -0.0492  0.0138 -0.013  -0.0036]\n",
      " [-0.0341  0.026   0.1096  0.0086 -0.0101]\n",
      " [-0.0236 -0.0077  0.0197 -0.0055 -0.0034]]\n",
      "mean_state_value -0.006546398033044873\n",
      "episode 122/600\n",
      "p1 0.6984000000000001 p0 0.07539999999999998\n",
      "trajectorySteps 113\n",
      "[[ 1  2  2 11 33]\n",
      " [ 1  0  0  9 37]\n",
      " [ 2  1  1  5  6]\n",
      " [ 0  0  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️🔄🔄⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬆️\n",
      "⬆️⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0134 -0.0302 -0.0186 -0.0051 -0.0086]\n",
      " [-0.0421 -0.0152 -0.0196 -0.0204 -0.0044]\n",
      " [-0.0144 -0.0491  0.0146 -0.0137 -0.0036]\n",
      " [-0.034   0.026   0.1112  0.0086 -0.0101]\n",
      " [-0.0235 -0.0076  0.0197 -0.0055 -0.0034]]\n",
      "mean_state_value -0.006483564953296256\n",
      "episode 123/600\n",
      "p1 0.6992 p0 0.07519999999999998\n",
      "trajectorySteps 26\n",
      "[[4 2 9 2 0]\n",
      " [3 0 0 1 0]\n",
      " [1 0 0 1 0]\n",
      " [0 0 2 1 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️🔄🔄⬇️\n",
      "⬆️⏬⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬆️\n",
      "⬆️⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0134 -0.0301 -0.0186 -0.0051 -0.0086]\n",
      " [-0.0421 -0.0152 -0.0195 -0.0204 -0.0044]\n",
      " [-0.0144 -0.0489  0.0147 -0.0144 -0.0036]\n",
      " [-0.034   0.0261  0.1129  0.0095 -0.01  ]\n",
      " [-0.0234 -0.0076  0.0198 -0.0054 -0.0034]]\n",
      "mean_state_value -0.006387219520889704\n",
      "episode 124/600\n",
      "p1 0.7000000000000001 p0 0.07499999999999998\n",
      "trajectorySteps 7\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [2 1 0 0 0]\n",
      " [1 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️🔄🔄⬇️\n",
      "⬆️⏬⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬆️\n",
      "⬆️⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0134 -0.0301 -0.0186 -0.0051 -0.0085]\n",
      " [-0.0419 -0.0151 -0.0195 -0.0204 -0.0044]\n",
      " [-0.0144 -0.0488  0.0147 -0.0144 -0.0036]\n",
      " [-0.0346  0.027   0.1143  0.0095 -0.01  ]\n",
      " [-0.0234 -0.0076  0.0198 -0.0054 -0.0034]]\n",
      "mean_state_value -0.006288619780311842\n",
      "episode 125/600\n",
      "p1 0.7008000000000001 p0 0.07479999999999998\n",
      "trajectorySteps 146\n",
      "[[ 5  7 10  3  0]\n",
      " [ 6  1  0  5 29]\n",
      " [ 3  2  0  2 26]\n",
      " [ 0  0  2  1 41]\n",
      " [ 0  0  1  1  1]]\n",
      "🔄➡️🔄🔄⬇️\n",
      "⬆️⏬⏩️➡️🔄\n",
      "⬆️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0135 -0.0308 -0.0186 -0.0051 -0.0085]\n",
      " [-0.042  -0.0151 -0.0194 -0.0203 -0.0047]\n",
      " [-0.0144 -0.0487  0.0147 -0.0143 -0.0038]\n",
      " [-0.0345  0.027   0.1159  0.0095 -0.0109]\n",
      " [-0.0233 -0.0076  0.0206 -0.0054 -0.0034]]\n",
      "mean_state_value -0.006254704913200491\n",
      "episode 126/600\n",
      "p1 0.7016 p0 0.07459999999999997\n",
      "trajectorySteps 66\n",
      "[[ 8  1  4  0  0]\n",
      " [ 4  1  1  1 11]\n",
      " [ 2  1  0  2 20]\n",
      " [ 1  1  2  0  1]\n",
      " [ 1  1  1  1  1]]\n",
      "🔄➡️➡️🔄⬇️\n",
      "⬆️⏪⏩️➡️🔄\n",
      "⬆️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0135 -0.0307 -0.0194 -0.0051 -0.0085]\n",
      " [-0.0426 -0.0152 -0.0194 -0.0202 -0.0047]\n",
      " [-0.0144 -0.0493  0.0148 -0.0143 -0.0038]\n",
      " [-0.0344  0.0263  0.1175  0.0096 -0.0109]\n",
      " [-0.0233 -0.0075  0.0215 -0.0054 -0.0033]]\n",
      "mean_state_value -0.006244052714280971\n",
      "episode 127/600\n",
      "p1 0.7024000000000001 p0 0.07439999999999998\n",
      "trajectorySteps 89\n",
      "[[ 4  1  1  6  9]\n",
      " [ 1  0  0  2 38]\n",
      " [ 2  0  0  0 15]\n",
      " [ 1  0  2  1  3]\n",
      " [ 0  0  0  0  3]]\n",
      "🔄➡️➡️🔄⬇️\n",
      "⬆️⏪⏩️⬇️🔄\n",
      "⬆️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0136 -0.0306 -0.0193 -0.0052 -0.0087]\n",
      " [-0.0425 -0.0152 -0.0193 -0.0202 -0.0048]\n",
      " [-0.0143 -0.0492  0.0148 -0.0142 -0.0039]\n",
      " [-0.0343  0.0264  0.1185  0.0103 -0.0117]\n",
      " [-0.0232 -0.0075  0.0215 -0.0053 -0.0035]]\n",
      "mean_state_value -0.006206263778631255\n",
      "episode 128/600\n",
      "p1 0.7032 p0 0.07419999999999997\n",
      "trajectorySteps 35\n",
      "[[8 1 2 9 0]\n",
      " [2 0 0 1 0]\n",
      " [1 0 0 3 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 2]]\n",
      "🔄➡️➡️🔄⬇️\n",
      "⬆️⏪⏩️⬇️🔄\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0138 -0.0305 -0.0193 -0.0053 -0.0087]\n",
      " [-0.0425 -0.0151 -0.0193 -0.0202 -0.0048]\n",
      " [-0.0143 -0.049   0.0149 -0.0142 -0.0039]\n",
      " [-0.0342  0.0265  0.1187  0.0103 -0.0117]\n",
      " [-0.0232 -0.0075  0.0221 -0.0053 -0.0035]]\n",
      "mean_state_value -0.006151166146989161\n",
      "episode 129/600\n",
      "p1 0.7040000000000001 p0 0.07399999999999998\n",
      "trajectorySteps 7\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [1 0 0 0 0]\n",
      " [1 0 2 0 0]\n",
      " [1 1 1 0 0]]\n",
      "🔄➡️➡️🔄⬇️\n",
      "⬆️⏪⏩️⬇️🔄\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0138 -0.0305 -0.0192 -0.0053 -0.0086]\n",
      " [-0.0424 -0.0151 -0.0192 -0.0201 -0.0048]\n",
      " [-0.0143 -0.0489  0.0149 -0.0142 -0.0039]\n",
      " [-0.0342  0.0265  0.1196  0.0103 -0.0116]\n",
      " [-0.0238 -0.0075  0.0229 -0.0053 -0.0035]]\n",
      "mean_state_value -0.006073037037319362\n",
      "episode 130/600\n",
      "p1 0.7048000000000001 p0 0.07379999999999998\n",
      "trajectorySteps 39\n",
      "[[18  1  1  2  0]\n",
      " [ 2  0  0  1  0]\n",
      " [ 2  0  1  2  7]\n",
      " [ 0  0  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️➡️🔄⬇️\n",
      "⬆️⏪⏩️⬇️🔄\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0139 -0.0304 -0.0192 -0.0053 -0.0086]\n",
      " [-0.0423 -0.015  -0.0192 -0.0201 -0.0048]\n",
      " [-0.0143 -0.0488  0.0158 -0.0149 -0.0039]\n",
      " [-0.0341  0.0266  0.1213  0.0103 -0.0116]\n",
      " [-0.0238 -0.0074  0.0229 -0.0053 -0.0035]]\n",
      "mean_state_value -0.0059655469256764924\n",
      "episode 131/600\n",
      "p1 0.7056 p0 0.07359999999999997\n",
      "trajectorySteps 25\n",
      "[[ 1  1  1 10  0]\n",
      " [ 1  0  0  1  0]\n",
      " [ 1  0  0  1  1]\n",
      " [ 0  0  2  1  1]\n",
      " [ 0  0  0  1  2]]\n",
      "➡️➡️➡️🔄⬇️\n",
      "⬆️⏪⏩️⬇️🔄\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️🔄\n",
      "[[-0.0138 -0.0303 -0.0191 -0.0053 -0.0086]\n",
      " [-0.0422 -0.015  -0.0191 -0.02   -0.0048]\n",
      " [-0.0142 -0.0486  0.0158 -0.0148 -0.0039]\n",
      " [-0.034   0.0267  0.1229  0.0112 -0.0116]\n",
      " [-0.0237 -0.0074  0.0229 -0.006  -0.0036]]\n",
      "mean_state_value -0.005858110209274034\n",
      "episode 132/600\n",
      "p1 0.7064000000000001 p0 0.07339999999999998\n",
      "trajectorySteps 126\n",
      "[[ 2  1  4 17  1]\n",
      " [ 2  0  0  2 27]\n",
      " [32  0  0  3 23]\n",
      " [ 1  0  2  0  3]\n",
      " [ 0  0  1  2  3]]\n",
      "➡️➡️➡️🔄⬇️\n",
      "⬆️⏪⏩️⬇️🔄\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️🔄\n",
      "[[-0.0138 -0.0302 -0.0191 -0.0054 -0.0086]\n",
      " [-0.0421 -0.015  -0.0191 -0.0199 -0.0049]\n",
      " [-0.0144 -0.0485  0.0159 -0.0148 -0.0041]\n",
      " [-0.0339  0.0267  0.1246  0.0112 -0.0115]\n",
      " [-0.0236 -0.0074  0.0238 -0.006  -0.0036]]\n",
      "mean_state_value -0.005747717653785939\n",
      "episode 133/600\n",
      "p1 0.7072 p0 0.07319999999999997\n",
      "trajectorySteps 102\n",
      "[[ 5  3  5 24  2]\n",
      " [ 3  0  1  5 32]\n",
      " [ 4  3  1  3  5]\n",
      " [ 0  0  2  1  3]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️⬅️➡️🔄⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️🔄\n",
      "[[-0.0138 -0.0302 -0.0199 -0.0055 -0.0086]\n",
      " [-0.042  -0.0149 -0.019  -0.0199 -0.0052]\n",
      " [-0.0143 -0.0484  0.0159 -0.0162 -0.0041]\n",
      " [-0.0338  0.0268  0.1248  0.0118 -0.0116]\n",
      " [-0.0236 -0.0074  0.0238 -0.006  -0.0036]]\n",
      "mean_state_value -0.005798989766833495\n",
      "episode 134/600\n",
      "p1 0.7080000000000001 p0 0.07299999999999998\n",
      "trajectorySteps 128\n",
      "[[ 1  1  3 19  3]\n",
      " [ 3  0  0  3  6]\n",
      " [ 3  0  0  7 68]\n",
      " [ 1  0  2  1  7]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️⬅️➡️🔄⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️🔄\n",
      "[[-0.0138 -0.0302 -0.0198 -0.0056 -0.0086]\n",
      " [-0.042  -0.0149 -0.0189 -0.0199 -0.0052]\n",
      " [-0.0143 -0.0483  0.016  -0.0169 -0.0046]\n",
      " [-0.0337  0.0268  0.1265  0.0127 -0.0116]\n",
      " [-0.0235 -0.0074  0.0239 -0.006  -0.0036]]\n",
      "mean_state_value -0.0057186462948183745\n",
      "episode 135/600\n",
      "p1 0.7088000000000001 p0 0.07279999999999998\n",
      "trajectorySteps 28\n",
      "[[9 7 0 0 0]\n",
      " [2 0 1 0 0]\n",
      " [3 2 2 0 0]\n",
      " [0 0 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️⬅️➡️🔄⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️🔄\n",
      "[[-0.0138 -0.0302 -0.0198 -0.0056 -0.0086]\n",
      " [-0.0418 -0.0149 -0.0196 -0.0198 -0.0052]\n",
      " [-0.0143 -0.0489  0.0161 -0.0168 -0.0046]\n",
      " [-0.0336  0.0269  0.1281  0.0127 -0.0116]\n",
      " [-0.0235 -0.0073  0.0239 -0.006  -0.0036]]\n",
      "mean_state_value -0.005668150833644262\n",
      "episode 136/600\n",
      "p1 0.7096 p0 0.07259999999999997\n",
      "trajectorySteps 167\n",
      "[[35 31  3  6  1]\n",
      " [ 7  1  0  0  3]\n",
      " [ 2  0  0  0  1]\n",
      " [ 0  0  2  0  8]\n",
      " [ 0  0  1  2 64]]\n",
      "🔄➡️➡️🔄⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0142 -0.0309 -0.0197 -0.0056 -0.0086]\n",
      " [-0.0419 -0.0148 -0.0196 -0.0198 -0.0053]\n",
      " [-0.0144 -0.0487  0.0162 -0.0168 -0.0046]\n",
      " [-0.0335  0.027   0.1284  0.0128 -0.0116]\n",
      " [-0.0234 -0.0073  0.0245 -0.0059 -0.0042]]\n",
      "mean_state_value -0.005677802556188097\n",
      "episode 137/600\n",
      "p1 0.7104000000000001 p0 0.07239999999999998\n",
      "trajectorySteps 78\n",
      "[[38  1  4  5  1]\n",
      " [ 8  1  0  0  1]\n",
      " [ 3  0  0  0  4]\n",
      " [ 0  0  2  0  3]\n",
      " [ 0  1  3  2  1]]\n",
      "🔄➡️➡️🔄⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬆️\n",
      "[[-0.0146 -0.0308 -0.0197 -0.0056 -0.0086]\n",
      " [-0.0426 -0.0148 -0.0195 -0.0197 -0.0053]\n",
      " [-0.0143 -0.0486  0.0162 -0.0167 -0.0047]\n",
      " [-0.0335  0.027   0.13    0.0128 -0.0116]\n",
      " [-0.0233 -0.0073  0.0246 -0.006  -0.0042]]\n",
      "mean_state_value -0.005629507725735843\n",
      "episode 138/600\n",
      "p1 0.7112 p0 0.07219999999999997\n",
      "trajectorySteps 62\n",
      "[[37  1  3  5  0]\n",
      " [ 4  0  0  1  1]\n",
      " [ 1  0  0  0  1]\n",
      " [ 0  0  2  1  3]\n",
      " [ 0  0  0  0  2]]\n",
      "➡️➡️➡️🔄⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0152 -0.0307 -0.0197 -0.0057 -0.0086]\n",
      " [-0.0426 -0.0148 -0.0195 -0.0197 -0.0053]\n",
      " [-0.0143 -0.0485  0.0163 -0.0167 -0.0047]\n",
      " [-0.0334  0.0271  0.1314  0.0137 -0.0123]\n",
      " [-0.0233 -0.0073  0.0247 -0.006  -0.0042]]\n",
      "mean_state_value -0.005560048277892731\n",
      "episode 139/600\n",
      "p1 0.7120000000000001 p0 0.07199999999999998\n",
      "trajectorySteps 39\n",
      "[[2 3 3 8 0]\n",
      " [1 1 0 2 1]\n",
      " [1 0 0 1 1]\n",
      " [0 2 2 0 1]\n",
      " [0 1 2 5 2]]\n",
      "🔄➡️➡️🔄⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0154 -0.0313 -0.0196 -0.0058 -0.0085]\n",
      " [-0.0426 -0.0147 -0.0194 -0.0196 -0.0053]\n",
      " [-0.0143 -0.0483  0.0163 -0.0166 -0.0047]\n",
      " [-0.0333  0.0273  0.1329  0.0137 -0.0123]\n",
      " [-0.0232 -0.008   0.024  -0.006  -0.0042]]\n",
      "mean_state_value -0.005560231340660486\n",
      "episode 140/600\n",
      "p1 0.7128000000000001 p0 0.07179999999999997\n",
      "trajectorySteps 43\n",
      "[[21  1  1  1  0]\n",
      " [ 2  0  0  1  0]\n",
      " [ 3  0  0  1  1]\n",
      " [ 3  0  2  0  2]\n",
      " [ 1  0  1  1  1]]\n",
      "➡️➡️➡️🔄⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬆️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0157 -0.0312 -0.0196 -0.0058 -0.0085]\n",
      " [-0.0425 -0.0147 -0.0194 -0.0196 -0.0052]\n",
      " [-0.0142 -0.0482  0.0163 -0.0166 -0.0046]\n",
      " [-0.0332  0.0273  0.1345  0.0137 -0.0123]\n",
      " [-0.0232 -0.0079  0.0249 -0.006  -0.0042]]\n",
      "mean_state_value -0.005430735609448567\n",
      "episode 141/600\n",
      "p1 0.7136 p0 0.07159999999999997\n",
      "trajectorySteps 47\n",
      "[[ 2  1  1  5  1]\n",
      " [ 2  0  0  0 12]\n",
      " [ 3  0  0  0 12]\n",
      " [ 1  0  2  0  2]\n",
      " [ 0  0  1  1  1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬆️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0157 -0.0312 -0.0195 -0.0058 -0.0085]\n",
      " [-0.0425 -0.0147 -0.0193 -0.0195 -0.0052]\n",
      " [-0.0143 -0.0481  0.0164 -0.0166 -0.0046]\n",
      " [-0.0332  0.0274  0.1362  0.0137 -0.0123]\n",
      " [-0.0231 -0.0079  0.0258 -0.006  -0.0041]]\n",
      "mean_state_value -0.005301540217471277\n",
      "episode 142/600\n",
      "p1 0.7144000000000001 p0 0.07139999999999998\n",
      "trajectorySteps 33\n",
      "[[0 1 1 1 0]\n",
      " [4 1 1 2 2]\n",
      " [8 2 0 0 2]\n",
      " [1 0 2 0 2]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬇️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬆️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0157 -0.0311 -0.0195 -0.0059 -0.0085]\n",
      " [-0.0424 -0.0146 -0.0192 -0.0201 -0.0052]\n",
      " [-0.0143 -0.0486  0.0164 -0.0165 -0.0046]\n",
      " [-0.0331  0.0275  0.1379  0.0138 -0.0123]\n",
      " [-0.023  -0.0079  0.0267 -0.0059 -0.0041]]\n",
      "mean_state_value -0.005215892436474605\n",
      "episode 143/600\n",
      "p1 0.7152000000000001 p0 0.07119999999999997\n",
      "trajectorySteps 73\n",
      "[[ 0  1  1  1  1]\n",
      " [16  1  0  2 10]\n",
      " [19  0  0  1 12]\n",
      " [ 4  0  2  1  1]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "🔄⏫️⏩️➡️⬆️\n",
      "🔄⬅️⏬➡️⬆️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0156 -0.031  -0.0194 -0.0059 -0.0084]\n",
      " [-0.043  -0.0146 -0.0192 -0.0201 -0.0052]\n",
      " [-0.0143 -0.0485  0.0165 -0.0165 -0.0048]\n",
      " [-0.033   0.0275  0.1393  0.0147 -0.013 ]\n",
      " [-0.023  -0.0079  0.0267 -0.0059 -0.0041]]\n",
      "mean_state_value -0.005151277180842347\n",
      "episode 144/600\n",
      "p1 0.7160000000000001 p0 0.07099999999999998\n",
      "trajectorySteps 500\n",
      "[[  2   3   2  20 200]\n",
      " [  1   1   0  17 203]\n",
      " [  5   0   1   7  29]\n",
      " [  0   0   2   0   3]\n",
      " [  0   0   1   1   2]]\n",
      "🔄➡️➡️🔄⬅️\n",
      "🔄⏫️⏩️➡️🔄\n",
      "🔄⬅️⏬➡️⬅️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0157 -0.0316 -0.0194 -0.006  -0.0106]\n",
      " [-0.0429 -0.0145 -0.0191 -0.02   -0.0063]\n",
      " [-0.0143 -0.0484  0.0165 -0.0172 -0.0051]\n",
      " [-0.0329  0.0276  0.1407  0.0147 -0.013 ]\n",
      " [-0.0229 -0.0078  0.0276 -0.0058 -0.0042]]\n",
      "mean_state_value -0.0052299347162735686\n",
      "episode 145/600\n",
      "p1 0.7168000000000001 p0 0.07079999999999997\n",
      "trajectorySteps 130\n",
      "[[ 2  4  3 34 10]\n",
      " [11  2  1  7 25]\n",
      " [24  0  0  1  2]\n",
      " [ 1  0  2  1  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄➡️➡️➡️⬅️\n",
      "🔄⏪⏩️➡️🔄\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0157 -0.0323 -0.0193 -0.0063 -0.0107]\n",
      " [-0.0435 -0.0145 -0.0191 -0.0207 -0.0063]\n",
      " [-0.0146 -0.0482  0.0166 -0.0178 -0.0052]\n",
      " [-0.0329  0.0277  0.1417  0.0154 -0.013 ]\n",
      " [-0.0228 -0.0078  0.0276 -0.0058 -0.0042]]\n",
      "mean_state_value -0.005265390105759517\n",
      "episode 146/600\n",
      "p1 0.7176 p0 0.07059999999999997\n",
      "trajectorySteps 5\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [1 1 0 0 0]\n",
      " [0 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄➡️➡️➡️⬅️\n",
      "🔄⏪⏩️➡️🔄\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0157 -0.0322 -0.0192 -0.0063 -0.0107]\n",
      " [-0.0433 -0.0145 -0.019  -0.0206 -0.0063]\n",
      " [-0.0146 -0.0488  0.0166 -0.0178 -0.0051]\n",
      " [-0.0328  0.0283  0.142   0.0154 -0.0129]\n",
      " [-0.0228 -0.0078  0.0277 -0.0058 -0.0042]]\n",
      "mean_state_value -0.005213412617935895\n",
      "episode 147/600\n",
      "p1 0.7184000000000001 p0 0.07039999999999998\n",
      "trajectorySteps 75\n",
      "[[ 6  0  0  0  0]\n",
      " [29  3  1  5 19]\n",
      " [ 4  0  0  0  1]\n",
      " [ 1  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "🔄➡️➡️➡️⬅️\n",
      "⬇️⏪⏩️⬆️⬇️\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0157 -0.0321 -0.0192 -0.0063 -0.0106]\n",
      " [-0.0454 -0.0152 -0.019  -0.0206 -0.0065]\n",
      " [-0.0147 -0.0487  0.0167 -0.0177 -0.0051]\n",
      " [-0.0328  0.0284  0.143   0.0155 -0.0129]\n",
      " [-0.0227 -0.0078  0.0284 -0.0058 -0.0041]]\n",
      "mean_state_value -0.005234537496351408\n",
      "episode 148/600\n",
      "p1 0.7192000000000001 p0 0.07019999999999997\n",
      "trajectorySteps 12\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [5 1 0 0 0]\n",
      " [3 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄➡️➡️➡️⬅️\n",
      "⬇️⏪⏩️⬆️⬇️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0157 -0.032  -0.0191 -0.0063 -0.0106]\n",
      " [-0.0453 -0.0151 -0.0189 -0.0205 -0.0065]\n",
      " [-0.0148 -0.0486  0.0167 -0.0177 -0.0051]\n",
      " [-0.0334  0.0293  0.1444  0.0155 -0.0128]\n",
      " [-0.0227 -0.0078  0.0285 -0.0057 -0.0041]]\n",
      "mean_state_value -0.005132239413496101\n",
      "episode 149/600\n",
      "p1 0.7200000000000001 p0 0.06999999999999998\n",
      "trajectorySteps 225\n",
      "[[ 4  1  8 72 66]\n",
      " [ 5  0  0 12  3]\n",
      " [41  1  0  0  2]\n",
      " [ 2  0  2  0  1]\n",
      " [ 0  0  1  3  1]]\n",
      "🔄➡️➡️⬅️⬅️\n",
      "⬇️⏪⏩️⬆️⬇️\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0158 -0.0319 -0.0193 -0.0067 -0.0113]\n",
      " [-0.0452 -0.0151 -0.0189 -0.0205 -0.0065]\n",
      " [-0.0152 -0.0485  0.0168 -0.0176 -0.0051]\n",
      " [-0.0333  0.0293  0.1461  0.0155 -0.0128]\n",
      " [-0.0226 -0.0077  0.0294 -0.0057 -0.0041]]\n",
      "mean_state_value -0.005066495702797793\n",
      "episode 150/600\n",
      "p1 0.7208000000000001 p0 0.06979999999999997\n",
      "trajectorySteps 196\n",
      "[[ 3  4 24 24  1]\n",
      " [11  1  3  2  0]\n",
      " [56  4  0  0  0]\n",
      " [54  3  2  0  0]\n",
      " [ 3  1  0  0  0]]\n",
      "🔄➡️🔄🔄⬅️\n",
      "⬆️⏪⏩️⬆️⬇️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0158 -0.0318 -0.0208 -0.007  -0.0113]\n",
      " [-0.0454 -0.0151 -0.0195 -0.0204 -0.0065]\n",
      " [-0.0154 -0.049   0.0168 -0.0176 -0.0051]\n",
      " [-0.0352  0.0296  0.1476  0.0156 -0.0128]\n",
      " [-0.0226 -0.0084  0.0294 -0.0057 -0.0041]]\n",
      "mean_state_value -0.005220106489027982\n",
      "episode 151/600\n",
      "p1 0.7216 p0 0.06959999999999997\n",
      "trajectorySteps 42\n",
      "[[ 1  1  4  0  0]\n",
      " [ 1  0  1  1  1]\n",
      " [18  5  0  0  2]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  2  1  1]]\n",
      "🔄➡️⬅️🔄⬅️\n",
      "⬆️⏪⏩️⬆️⬇️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0157 -0.0318 -0.0216 -0.0069 -0.0112]\n",
      " [-0.0453 -0.015  -0.0194 -0.0204 -0.0065]\n",
      " [-0.0154 -0.0489  0.0169 -0.0175 -0.0051]\n",
      " [-0.0351  0.0296  0.149   0.0156 -0.0127]\n",
      " [-0.0226 -0.0084  0.0303 -0.0057 -0.0041]]\n",
      "mean_state_value -0.0051184357919437104\n",
      "episode 152/600\n",
      "p1 0.7224000000000002 p0 0.06939999999999998\n",
      "trajectorySteps 181\n",
      "[[42 32 25 12  2]\n",
      " [ 7  1  1  2  0]\n",
      " [43  3  0  1  2]\n",
      " [ 2  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "➡️➡️➡️🔄⬅️\n",
      "⬇️⏪⏩️⬆️⬇️\n",
      "⬇️🔄⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0164 -0.0324 -0.0216 -0.0069 -0.0113]\n",
      " [-0.0453 -0.015  -0.0194 -0.021  -0.0065]\n",
      " [-0.0156 -0.0489  0.0169 -0.0175 -0.0051]\n",
      " [-0.035   0.0297  0.1507  0.0156 -0.0127]\n",
      " [-0.0225 -0.0084  0.0312 -0.0056 -0.0041]]\n",
      "mean_state_value -0.00507916200444053\n",
      "episode 153/600\n",
      "p1 0.7232000000000001 p0 0.06919999999999997\n",
      "trajectorySteps 16\n",
      "[[0 0 0 0 0]\n",
      " [2 0 0 0 0]\n",
      " [6 1 0 0 0]\n",
      " [4 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️🔄⬅️\n",
      "⬇️⏪⏩️⬆️⬇️\n",
      "⬇️🔄⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0164 -0.0323 -0.0216 -0.0069 -0.0112]\n",
      " [-0.0451 -0.0149 -0.0193 -0.0209 -0.0065]\n",
      " [-0.0156 -0.0494  0.0169 -0.0174 -0.0051]\n",
      " [-0.0349  0.0306  0.1521  0.0156 -0.0127]\n",
      " [-0.0224 -0.0083  0.0313 -0.0056 -0.0041]]\n",
      "mean_state_value -0.0049673475313975375\n",
      "episode 154/600\n",
      "p1 0.7240000000000001 p0 0.06899999999999998\n",
      "trajectorySteps 12\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [1 6 0 0 0]\n",
      " [0 1 2 0 0]\n",
      " [0 1 1 0 0]]\n",
      "➡️➡️➡️🔄⬅️\n",
      "⬇️⏪⏩️⬆️⬇️\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0163 -0.0322 -0.0215 -0.0069 -0.0112]\n",
      " [-0.045  -0.0149 -0.0193 -0.0209 -0.0064]\n",
      " [-0.0156 -0.05    0.017  -0.0174 -0.0051]\n",
      " [-0.0348  0.03    0.1536  0.0157 -0.0126]\n",
      " [-0.0224 -0.0083  0.0322 -0.0056 -0.0041]]\n",
      "mean_state_value -0.0048830991733468145\n",
      "episode 155/600\n",
      "p1 0.7248000000000001 p0 0.06879999999999997\n",
      "trajectorySteps 28\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 4  0  0  0  0]\n",
      " [11  1  2  0  0]\n",
      " [10  0  0  0  0]]\n",
      "➡️➡️➡️🔄⬅️\n",
      "⬇️⏪⏩️⬆️⬇️\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0163 -0.0322 -0.0215 -0.0069 -0.0112]\n",
      " [-0.0449 -0.0149 -0.0192 -0.0208 -0.0064]\n",
      " [-0.0156 -0.0498  0.017  -0.0173 -0.0051]\n",
      " [-0.0354  0.0308  0.1546  0.0157 -0.0126]\n",
      " [-0.0225 -0.0082  0.0322 -0.0055 -0.0041]]\n",
      "mean_state_value -0.0048052852522011795\n",
      "episode 156/600\n",
      "p1 0.7256 p0 0.06859999999999997\n",
      "trajectorySteps 35\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [ 3  0  2  0  0]\n",
      " [27  1  1  0  0]]\n",
      "➡️➡️➡️🔄⬅️\n",
      "⬇️⏪⏩️⬆️⬇️\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0163 -0.0321 -0.0214 -0.0069 -0.0112]\n",
      " [-0.0448 -0.0148 -0.0192 -0.0207 -0.0064]\n",
      " [-0.0156 -0.0497  0.0171 -0.0173 -0.0051]\n",
      " [-0.0354  0.0308  0.1563  0.0157 -0.0126]\n",
      " [-0.0235 -0.0082  0.0332 -0.0055 -0.004 ]]\n",
      "mean_state_value -0.004695542289922852\n",
      "episode 157/600\n",
      "p1 0.7264000000000002 p0 0.06839999999999997\n",
      "trajectorySteps 24\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 2  0  0  0  0]\n",
      " [ 9  1  2  0  0]\n",
      " [10  0  0  0  0]]\n",
      "➡️➡️➡️🔄⬅️\n",
      "⬇️⏪⏩️⬆️⬇️\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "🔄⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0162 -0.032  -0.0213 -0.0069 -0.0111]\n",
      " [-0.0446 -0.0148 -0.0191 -0.0207 -0.0064]\n",
      " [-0.0155 -0.0496  0.0171 -0.0172 -0.0051]\n",
      " [-0.036   0.0318  0.158   0.0158 -0.0125]\n",
      " [-0.0236 -0.0082  0.0332 -0.0055 -0.004 ]]\n",
      "mean_state_value -0.004577216554067524\n",
      "episode 158/600\n",
      "p1 0.7272000000000001 p0 0.06819999999999997\n",
      "trajectorySteps 25\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [ 1  1  2  0  0]\n",
      " [19  1  0  0  0]]\n",
      "➡️➡️➡️🔄⬅️\n",
      "⬇️⏪⏩️⬆️⬇️\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "🔄⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0162 -0.0319 -0.0213 -0.0068 -0.0111]\n",
      " [-0.0445 -0.0147 -0.0191 -0.0206 -0.0064]\n",
      " [-0.0155 -0.0494  0.0172 -0.0172 -0.0051]\n",
      " [-0.0359  0.0327  0.1598  0.0158 -0.0125]\n",
      " [-0.0244 -0.0088  0.0333 -0.0055 -0.004 ]]\n",
      "mean_state_value -0.004485852971897022\n",
      "episode 159/600\n",
      "p1 0.7280000000000001 p0 0.06799999999999998\n",
      "trajectorySteps 82\n",
      "[[ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [ 6  1  0  0  0]\n",
      " [46  2  2  0  0]\n",
      " [24  0  0  0  0]]\n",
      "➡️➡️➡️🔄⬅️\n",
      "⬇️⏪⏩️⬆️⬇️\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0161 -0.0318 -0.0212 -0.0068 -0.0111]\n",
      " [-0.0444 -0.0147 -0.019  -0.0206 -0.0064]\n",
      " [-0.0155 -0.0493  0.0172 -0.0171 -0.005 ]\n",
      " [-0.0375  0.0337  0.1615  0.0158 -0.0124]\n",
      " [-0.0245 -0.0088  0.0333 -0.0055 -0.004 ]]\n",
      "mean_state_value -0.004407318344187886\n",
      "episode 160/600\n",
      "p1 0.7288000000000001 p0 0.06779999999999997\n",
      "trajectorySteps 12\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [1 0 0 0 0]\n",
      " [3 0 2 0 0]\n",
      " [4 1 1 0 0]]\n",
      "➡️➡️➡️🔄⬅️\n",
      "⬇️⏪⏩️⬆️⬇️\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0161 -0.0317 -0.0212 -0.0068 -0.011 ]\n",
      " [-0.0443 -0.0147 -0.0189 -0.0205 -0.0063]\n",
      " [-0.0154 -0.0491  0.0173 -0.0171 -0.005 ]\n",
      " [-0.0374  0.0337  0.1632  0.0158 -0.0124]\n",
      " [-0.0252 -0.0087  0.0342 -0.0055 -0.004 ]]\n",
      "mean_state_value -0.004284358736537382\n",
      "episode 161/600\n",
      "p1 0.7296 p0 0.06759999999999997\n",
      "trajectorySteps 43\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 4  0  0  0  0]\n",
      " [19  1  2  0  0]\n",
      " [17  0  0  0  0]]\n",
      "➡️➡️➡️🔄⬅️\n",
      "⬇️⏪⏩️⬆️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0161 -0.0316 -0.0211 -0.0068 -0.011 ]\n",
      " [-0.0442 -0.0146 -0.0189 -0.0204 -0.0063]\n",
      " [-0.0158 -0.049   0.0173 -0.017  -0.005 ]\n",
      " [-0.038   0.0347  0.165   0.0159 -0.0124]\n",
      " [-0.0254 -0.0087  0.0343 -0.0054 -0.004 ]]\n",
      "mean_state_value -0.004184178695170096\n",
      "episode 162/600\n",
      "p1 0.7304000000000002 p0 0.06739999999999997\n",
      "trajectorySteps 11\n",
      "[[0 0 0 0 0]\n",
      " [1 0 0 0 0]\n",
      " [4 1 2 0 0]\n",
      " [1 0 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️🔄⬅️\n",
      "⬇️⏪⏩️⬆️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.016  -0.0315 -0.021  -0.0068 -0.011 ]\n",
      " [-0.044  -0.0146 -0.0188 -0.0204 -0.0063]\n",
      " [-0.0157 -0.0495  0.0176 -0.017  -0.005 ]\n",
      " [-0.0379  0.0348  0.1667  0.0159 -0.0123]\n",
      " [-0.0253 -0.0087  0.0343 -0.0054 -0.004 ]]\n",
      "mean_state_value -0.004083254426076556\n",
      "episode 163/600\n",
      "p1 0.7312000000000001 p0 0.06719999999999997\n",
      "trajectorySteps 11\n",
      "[[0 0 0 0 0]\n",
      " [1 0 0 0 0]\n",
      " [4 1 1 0 0]\n",
      " [2 0 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️🔄⬅️\n",
      "⬇️⏪⏩️⬆️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.016  -0.0314 -0.021  -0.0068 -0.0109]\n",
      " [-0.0439 -0.0145 -0.0188 -0.0203 -0.0063]\n",
      " [-0.0157 -0.05    0.0185 -0.0169 -0.005 ]\n",
      " [-0.0378  0.0349  0.1683  0.0159 -0.0123]\n",
      " [-0.0253 -0.0086  0.0344 -0.0054 -0.004 ]]\n",
      "mean_state_value -0.003962676186407663\n",
      "episode 164/600\n",
      "p1 0.7320000000000001 p0 0.06699999999999998\n",
      "trajectorySteps 46\n",
      "[[ 2  2  1 19  2]\n",
      " [ 1  0  0  4  2]\n",
      " [ 2  0  0  2  2]\n",
      " [ 1  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "➡️➡️➡️🔄⬅️\n",
      "⬇️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.016  -0.0313 -0.0209 -0.0069 -0.0109]\n",
      " [-0.0438 -0.0145 -0.0187 -0.0203 -0.0063]\n",
      " [-0.0157 -0.0499  0.0186 -0.0169 -0.005 ]\n",
      " [-0.0377  0.0349  0.17    0.016  -0.0123]\n",
      " [-0.0252 -0.0086  0.0353 -0.0054 -0.004 ]]\n",
      "mean_state_value -0.003816975311254142\n",
      "episode 165/600\n",
      "p1 0.7328000000000001 p0 0.06679999999999997\n",
      "trajectorySteps 148\n",
      "[[ 1  1  1 17  1]\n",
      " [45  2  0  0  2]\n",
      " [56  2  0  0  1]\n",
      " [10  0  2  0  1]\n",
      " [ 3  0  1  1  1]]\n",
      "➡️➡️➡️🔄⬅️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0159 -0.0313 -0.0209 -0.0072 -0.0109]\n",
      " [-0.0452 -0.0145 -0.0187 -0.0202 -0.0064]\n",
      " [-0.0161 -0.0497  0.0186 -0.0168 -0.005 ]\n",
      " [-0.0377  0.035   0.1704  0.016  -0.0122]\n",
      " [-0.0252 -0.0086  0.036  -0.0053 -0.0039]]\n",
      "mean_state_value -0.003827139734767654\n",
      "episode 166/600\n",
      "p1 0.7336 p0 0.06659999999999996\n",
      "trajectorySteps 21\n",
      "[[1 1 1 4 0]\n",
      " [1 0 0 1 0]\n",
      " [1 0 0 1 1]\n",
      " [0 0 2 0 2]\n",
      " [0 0 2 1 2]]\n",
      "➡️➡️🔄🔄⬅️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0159 -0.0312 -0.0209 -0.0072 -0.0109]\n",
      " [-0.0451 -0.0145 -0.0186 -0.0202 -0.0064]\n",
      " [-0.0161 -0.0496  0.0187 -0.0168 -0.0049]\n",
      " [-0.0376  0.0351  0.1722  0.016  -0.0122]\n",
      " [-0.0251 -0.0086  0.0368 -0.0053 -0.0039]]\n",
      "mean_state_value -0.003678784660267159\n",
      "episode 167/600\n",
      "p1 0.7344000000000002 p0 0.06639999999999997\n",
      "trajectorySteps 9\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [2 0 0 0 0]\n",
      " [2 1 2 0 0]\n",
      " [2 0 0 0 0]]\n",
      "➡️➡️🔄🔄⬅️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0158 -0.0311 -0.0208 -0.0072 -0.0108]\n",
      " [-0.0449 -0.0144 -0.0186 -0.0201 -0.0064]\n",
      " [-0.0161 -0.0495  0.0187 -0.0167 -0.0049]\n",
      " [-0.0381  0.036   0.1739  0.016  -0.0122]\n",
      " [-0.0251 -0.0085  0.0369 -0.0053 -0.0039]]\n",
      "mean_state_value -0.003551918179898346\n",
      "episode 168/600\n",
      "p1 0.7352000000000001 p0 0.06619999999999997\n",
      "trajectorySteps 21\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 2  0  0  0  0]\n",
      " [ 2  0  2  0  0]\n",
      " [11  1  2  1  0]]\n",
      "➡️➡️🔄🔄⬅️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0158 -0.031  -0.0207 -0.0072 -0.0108]\n",
      " [-0.0448 -0.0144 -0.0185 -0.0201 -0.0063]\n",
      " [-0.016  -0.0493  0.0188 -0.0167 -0.0049]\n",
      " [-0.0382  0.0361  0.1757  0.0161 -0.0121]\n",
      " [-0.0259 -0.0085  0.0378 -0.0052 -0.0039]]\n",
      "mean_state_value -0.0034351005513448313\n",
      "episode 169/600\n",
      "p1 0.7360000000000001 p0 0.06599999999999998\n",
      "trajectorySteps 17\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [6 0 0 0 0]\n",
      " [6 1 2 0 0]\n",
      " [0 1 1 0 0]]\n",
      "➡️➡️🔄🔄⬅️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0158 -0.0309 -0.0207 -0.0072 -0.0108]\n",
      " [-0.0447 -0.0143 -0.0184 -0.02   -0.0063]\n",
      " [-0.0161 -0.0492  0.0188 -0.0166 -0.0049]\n",
      " [-0.0388  0.0355  0.1775  0.0161 -0.0121]\n",
      " [-0.0258 -0.0084  0.0388 -0.0052 -0.0039]]\n",
      "mean_state_value -0.003331783629672718\n",
      "episode 170/600\n",
      "p1 0.7368000000000001 p0 0.06579999999999997\n",
      "trajectorySteps 29\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [ 4  0  2  0  0]\n",
      " [20  1  1  0  0]]\n",
      "➡️➡️🔄🔄⬅️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0157 -0.0308 -0.0206 -0.0071 -0.0107]\n",
      " [-0.0446 -0.0143 -0.0184 -0.0199 -0.0063]\n",
      " [-0.0161 -0.049   0.0189 -0.0166 -0.0049]\n",
      " [-0.0387  0.0356  0.1785  0.0161 -0.0121]\n",
      " [-0.0267 -0.0084  0.0395 -0.0052 -0.0039]]\n",
      "mean_state_value -0.00325080964511547\n",
      "episode 171/600\n",
      "p1 0.7376 p0 0.06559999999999996\n",
      "trajectorySteps 17\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [10  0  0  0  0]\n",
      " [ 1  1  2  0  0]\n",
      " [ 2  1  0  0  0]]\n",
      "➡️➡️🔄🔄⬅️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0157 -0.0307 -0.0206 -0.0071 -0.0107]\n",
      " [-0.0444 -0.0143 -0.0183 -0.0199 -0.0063]\n",
      " [-0.0161 -0.0489  0.0189 -0.0165 -0.0049]\n",
      " [-0.0386  0.0366  0.1801  0.0162 -0.012 ]\n",
      " [-0.0273 -0.009   0.0396 -0.0052 -0.0039]]\n",
      "mean_state_value -0.0031593638773738057\n",
      "episode 172/600\n",
      "p1 0.7384000000000002 p0 0.06539999999999997\n",
      "trajectorySteps 39\n",
      "[[ 2  2 13  9  2]\n",
      " [ 1  0  0  1  0]\n",
      " [ 1  0  0  1  1]\n",
      " [ 0  0  2  1  1]\n",
      " [ 0  0  0  1  1]]\n",
      "➡️➡️➡️🔄⬅️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0157 -0.0307 -0.0206 -0.0071 -0.0107]\n",
      " [-0.0443 -0.0142 -0.0183 -0.0198 -0.0063]\n",
      " [-0.0161 -0.0487  0.019  -0.0165 -0.0049]\n",
      " [-0.0385  0.0367  0.1819  0.0171 -0.012 ]\n",
      " [-0.0273 -0.009   0.0396 -0.0058 -0.0039]]\n",
      "mean_state_value -0.003037925749968689\n",
      "episode 173/600\n",
      "p1 0.7392000000000001 p0 0.06519999999999997\n",
      "trajectorySteps 119\n",
      "[[ 1  1  1 19  4]\n",
      " [ 1  0  0  0  3]\n",
      " [16  3  0  0  2]\n",
      " [29  0  2  0  3]\n",
      " [31  0  1  1  1]]\n",
      "➡️➡️➡️🔄⬅️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0156 -0.0306 -0.0206 -0.0071 -0.0108]\n",
      " [-0.0442 -0.0142 -0.0182 -0.0198 -0.0063]\n",
      " [-0.0162 -0.0486  0.019  -0.0164 -0.0049]\n",
      " [-0.0384  0.0368  0.1836  0.0171 -0.012 ]\n",
      " [-0.0274 -0.0089  0.0406 -0.0057 -0.0039]]\n",
      "mean_state_value -0.002903746368042799\n",
      "episode 174/600\n",
      "p1 0.7400000000000001 p0 0.06499999999999997\n",
      "trajectorySteps 61\n",
      "[[ 1  1  2 16  2]\n",
      " [ 1  0  0  0  1]\n",
      " [27  0  0  0  1]\n",
      " [ 2  0  2  0  2]\n",
      " [ 0  0  1  1  1]]\n",
      "➡️➡️🔄🔄⬅️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0156 -0.0305 -0.0205 -0.0073 -0.0107]\n",
      " [-0.044  -0.0141 -0.0182 -0.0197 -0.0063]\n",
      " [-0.0163 -0.0484  0.0191 -0.0164 -0.0049]\n",
      " [-0.0383  0.0368  0.1854  0.0172 -0.012 ]\n",
      " [-0.0273 -0.0089  0.0416 -0.0057 -0.0039]]\n",
      "mean_state_value -0.002764658618392462\n",
      "episode 175/600\n",
      "p1 0.7408000000000001 p0 0.06479999999999997\n",
      "trajectorySteps 33\n",
      "[[ 1  2 14  0  0]\n",
      " [ 1  0  1  1  0]\n",
      " [ 1  0  0  1  1]\n",
      " [ 0  0  2  0  3]\n",
      " [ 0  0  1  1  3]]\n",
      "➡️⬅️🔄🔄⬅️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "🔄⬅️⏬➡️⬅️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0156 -0.0306 -0.0212 -0.0073 -0.0107]\n",
      " [-0.0439 -0.0141 -0.0181 -0.0196 -0.0063]\n",
      " [-0.0163 -0.0483  0.0192 -0.0163 -0.0049]\n",
      " [-0.0382  0.0369  0.1872  0.0172 -0.0121]\n",
      " [-0.0273 -0.0089  0.0425 -0.0056 -0.004 ]]\n",
      "mean_state_value -0.0026460849049425835\n",
      "episode 176/600\n",
      "p1 0.7416 p0 0.06459999999999996\n",
      "trajectorySteps 99\n",
      "[[19 18  0  0  0]\n",
      " [ 1  1  0  0  0]\n",
      " [45  6  1  0  0]\n",
      " [ 6  0  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄🔄🔄🔄⬅️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬅️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0159 -0.0314 -0.0211 -0.0073 -0.0107]\n",
      " [-0.0438 -0.0141 -0.0181 -0.0196 -0.0063]\n",
      " [-0.0164 -0.0488  0.0201 -0.0163 -0.0049]\n",
      " [-0.0382  0.037   0.189   0.0172 -0.012 ]\n",
      " [-0.0272 -0.0088  0.0426 -0.0056 -0.004 ]]\n",
      "mean_state_value -0.0025748744624434074\n",
      "episode 177/600\n",
      "p1 0.7424000000000002 p0 0.06439999999999997\n",
      "trajectorySteps 43\n",
      "[[ 3 12  4  2  0]\n",
      " [ 2  1  0  1  0]\n",
      " [ 3  1  1  6  5]\n",
      " [ 0  0  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄➡️🔄🔄⬅️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0159 -0.032  -0.0211 -0.0073 -0.0107]\n",
      " [-0.0437 -0.0141 -0.018  -0.0195 -0.0062]\n",
      " [-0.0164 -0.0487  0.0209 -0.0169 -0.0049]\n",
      " [-0.0381  0.0371  0.1901  0.0173 -0.012 ]\n",
      " [-0.0271 -0.0088  0.0427 -0.0056 -0.0039]]\n",
      "mean_state_value -0.002512109350624236\n",
      "episode 178/600\n",
      "p1 0.7432000000000001 p0 0.06419999999999997\n",
      "trajectorySteps 34\n",
      "[[12  1  3  0  0]\n",
      " [ 8  3  1  0  0]\n",
      " [ 2  0  2  0  0]\n",
      " [ 0  0  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄➡️➡️🔄⬅️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0161 -0.032  -0.0217 -0.0072 -0.0106]\n",
      " [-0.0455 -0.0142 -0.0186 -0.0195 -0.0062]\n",
      " [-0.0164 -0.0485  0.021  -0.0168 -0.0048]\n",
      " [-0.0379  0.0372  0.1905  0.0173 -0.012 ]\n",
      " [-0.027  -0.0088  0.0427 -0.0055 -0.0039]]\n",
      "mean_state_value -0.0025935445523925993\n",
      "episode 179/600\n",
      "p1 0.7440000000000001 p0 0.06399999999999997\n",
      "trajectorySteps 18\n",
      "[[3 1 1 6 0]\n",
      " [1 0 0 1 0]\n",
      " [1 0 0 1 0]\n",
      " [0 0 2 1 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄➡️➡️🔄⬅️\n",
      "⬆️⏬⏩️⬆️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.016  -0.0319 -0.0216 -0.0074 -0.0106]\n",
      " [-0.0454 -0.0142 -0.0185 -0.0194 -0.0062]\n",
      " [-0.0164 -0.0484  0.021  -0.0174 -0.0048]\n",
      " [-0.0378  0.0372  0.1919  0.0183 -0.0119]\n",
      " [-0.027  -0.0088  0.0428 -0.0055 -0.0039]]\n",
      "mean_state_value -0.002477267000081422\n",
      "episode 180/600\n",
      "p1 0.7448000000000001 p0 0.06379999999999997\n",
      "trajectorySteps 67\n",
      "[[22  3  2 13  2]\n",
      " [ 6  1  0  1  1]\n",
      " [ 3  1  0  1  4]\n",
      " [ 0  0  2  1  4]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄➡️➡️🔄⬅️\n",
      "⬆️⏬⏩️⬆️⬇️\n",
      "⬆️⬅️⏬➡️⬆️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0163 -0.0324 -0.0216 -0.0075 -0.0106]\n",
      " [-0.0454 -0.0142 -0.0185 -0.0194 -0.0062]\n",
      " [-0.0164 -0.0482  0.0211 -0.0173 -0.0049]\n",
      " [-0.0377  0.0373  0.1934  0.0192 -0.0126]\n",
      " [-0.0269 -0.0087  0.0428 -0.0055 -0.0039]]\n",
      "mean_state_value -0.0024077886639798634\n",
      "episode 181/600\n",
      "p1 0.7456 p0 0.06359999999999996\n",
      "trajectorySteps 53\n",
      "[[ 7  1  1 25  3]\n",
      " [ 3  0  0  1  4]\n",
      " [ 1  0  0  0  3]\n",
      " [ 0  0  2  1  1]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄➡️➡️⬅️⬅️\n",
      "⬆️⏬⏩️⬆️⬇️\n",
      "⬆️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0163 -0.0323 -0.0215 -0.0075 -0.0105]\n",
      " [-0.0453 -0.0141 -0.0184 -0.0193 -0.0062]\n",
      " [-0.0163 -0.0481  0.0211 -0.0173 -0.0049]\n",
      " [-0.0376  0.0374  0.1949  0.0202 -0.0132]\n",
      " [-0.0268 -0.0087  0.0429 -0.0055 -0.0039]]\n",
      "mean_state_value -0.002292021204772331\n",
      "episode 182/600\n",
      "p1 0.7464000000000002 p0 0.06339999999999997\n",
      "trajectorySteps 342\n",
      "[[72 11 84 89  8]\n",
      " [12  4 11 12  3]\n",
      " [ 7  4  0  1 18]\n",
      " [ 1  0  2  1  2]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️⬅️➡️➡️⬅️\n",
      "⬇️⏬⏫️⬇️⬇️\n",
      "⬇️⬅️⏬➡️⬅️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0169 -0.033  -0.0265 -0.008  -0.0106]\n",
      " [-0.0459 -0.0141 -0.0203 -0.0212 -0.0063]\n",
      " [-0.0163 -0.0479  0.0212 -0.0172 -0.005 ]\n",
      " [-0.0375  0.0375  0.1953  0.0208 -0.0138]\n",
      " [-0.0267 -0.0087  0.0429 -0.0055 -0.0039]]\n",
      "mean_state_value -0.002697589203545984\n",
      "episode 183/600\n",
      "p1 0.7472000000000001 p0 0.06319999999999996\n",
      "trajectorySteps 22\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [7 1 0 0 0]\n",
      " [7 1 2 0 0]\n",
      " [1 2 1 0 0]]\n",
      "➡️⬅️➡️➡️⬅️\n",
      "⬇️⏬⏫️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬅️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0169 -0.0329 -0.0264 -0.008  -0.0106]\n",
      " [-0.0457 -0.014  -0.0203 -0.0211 -0.0063]\n",
      " [-0.0164 -0.0478  0.0212 -0.0172 -0.005 ]\n",
      " [-0.0381  0.0376  0.1971  0.0209 -0.0137]\n",
      " [-0.0273 -0.0092  0.0439 -0.0054 -0.0039]]\n",
      "mean_state_value -0.0026143090649745266\n",
      "episode 184/600\n",
      "p1 0.7480000000000001 p0 0.06299999999999997\n",
      "trajectorySteps 5\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [1 0 0 0 0]\n",
      " [1 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️⬅️➡️➡️⬅️\n",
      "⬇️⏬⏫️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬅️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0169 -0.0328 -0.0263 -0.0079 -0.0105]\n",
      " [-0.0456 -0.014  -0.0202 -0.0211 -0.0062]\n",
      " [-0.0164 -0.0476  0.0213 -0.0171 -0.005 ]\n",
      " [-0.0386  0.0386  0.1989  0.0209 -0.0137]\n",
      " [-0.0272 -0.0092  0.044  -0.0054 -0.0039]]\n",
      "mean_state_value -0.00247546923836054\n",
      "episode 185/600\n",
      "p1 0.7488000000000001 p0 0.06279999999999997\n",
      "trajectorySteps 260\n",
      "[[69 48  6 17  9]\n",
      " [44  1  1  2  1]\n",
      " [42  3  1  5  6]\n",
      " [ 1  0  2  0  2]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️➡️➡️🔄\n",
      "⬆️⏬⏫️⬇️⬇️\n",
      "➡️⬅️⏬➡️⬆️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0174 -0.0331 -0.0262 -0.0081 -0.0106]\n",
      " [-0.0465 -0.014  -0.0201 -0.0216 -0.0062]\n",
      " [-0.0167 -0.0475  0.0219 -0.0177 -0.005 ]\n",
      " [-0.0385  0.0387  0.1994  0.0209 -0.0136]\n",
      " [-0.0271 -0.0092  0.044  -0.0054 -0.0039]]\n",
      "mean_state_value -0.002534829373125743\n",
      "episode 186/600\n",
      "p1 0.7496 p0 0.06259999999999996\n",
      "trajectorySteps 120\n",
      "[[ 3  4  2  4 49]\n",
      " [ 2  1  0  1 18]\n",
      " [ 3  2  0  2 21]\n",
      " [ 0  0  2  0  3]\n",
      " [ 0  0  1  1  1]]\n",
      "🔄➡️➡️➡️⬅️\n",
      "⬆️⏬⏫️⬇️⬅️\n",
      "➡️⬅️⏬➡️⬆️\n",
      "⬆️⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0176 -0.0336 -0.0262 -0.0081 -0.011 ]\n",
      " [-0.0464 -0.0139 -0.0201 -0.0215 -0.0063]\n",
      " [-0.0168 -0.0474  0.022  -0.0176 -0.005 ]\n",
      " [-0.0383  0.0387  0.2005  0.021  -0.0136]\n",
      " [-0.0271 -0.0091  0.0448 -0.0053 -0.0038]]\n",
      "mean_state_value -0.0024730372154891478\n",
      "episode 187/600\n",
      "p1 0.7504000000000002 p0 0.06239999999999997\n",
      "trajectorySteps 8\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [3 2 0 0 0]\n",
      " [0 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄➡️➡️➡️⬅️\n",
      "⬆️⏬⏫️⬇️⬅️\n",
      "🔄⬅️⏬➡️⬆️\n",
      "⬆️⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0175 -0.0335 -0.0262 -0.0081 -0.011 ]\n",
      " [-0.0462 -0.0139 -0.02   -0.0215 -0.0062]\n",
      " [-0.0168 -0.0478  0.022  -0.0176 -0.005 ]\n",
      " [-0.0382  0.0394  0.2009  0.021  -0.0135]\n",
      " [-0.027  -0.0091  0.0449 -0.0053 -0.0038]]\n",
      "mean_state_value -0.002404254316458299\n",
      "episode 188/600\n",
      "p1 0.7512000000000001 p0 0.062199999999999964\n",
      "trajectorySteps 36\n",
      "[[ 4  1  1  0  0]\n",
      " [ 3  1  1  0  0]\n",
      " [17  2  2  0  0]\n",
      " [ 1  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄🔄➡️➡️⬅️\n",
      "⬆️⏫️⏫️⬇️⬅️\n",
      "🔄⬅️⏬➡️⬆️\n",
      "⬆️⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0176 -0.0335 -0.0267 -0.008  -0.011 ]\n",
      " [-0.0463 -0.0139 -0.0206 -0.0214 -0.0062]\n",
      " [-0.0169 -0.0489  0.0222 -0.0175 -0.005 ]\n",
      " [-0.0381  0.0395  0.202   0.021  -0.0135]\n",
      " [-0.0269 -0.0091  0.0449 -0.0053 -0.0038]]\n",
      "mean_state_value -0.0024197251351545847\n",
      "episode 189/600\n",
      "p1 0.7520000000000001 p0 0.061999999999999965\n",
      "trajectorySteps 86\n",
      "[[ 0  0  2 13 17]\n",
      " [ 0  0  1  8 10]\n",
      " [14  1  2  8  8]\n",
      " [ 0  0  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄🔄➡️🔄⬅️\n",
      "⬆️⏫️⏫️⬆️🔄\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0175 -0.0334 -0.0266 -0.0081 -0.0112]\n",
      " [-0.0461 -0.0139 -0.0205 -0.022  -0.0063]\n",
      " [-0.0171 -0.0493  0.0232 -0.0181 -0.0051]\n",
      " [-0.038   0.0395  0.2039  0.0211 -0.0135]\n",
      " [-0.0268 -0.009   0.045  -0.0053 -0.0038]]\n",
      "mean_state_value -0.0023635898531806374\n",
      "episode 190/600\n",
      "p1 0.7528000000000001 p0 0.061799999999999966\n",
      "trajectorySteps 829\n",
      "[[182 130  32 347  20]\n",
      " [ 10  10   3  31  32]\n",
      " [ 21   1   0   2   0]\n",
      " [  5   1   2   0   0]\n",
      " [  0   0   0   0   0]]\n",
      "➡️➡️🔄➡️⬇️\n",
      "⬆️⏬⏫️➡️⬇️\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0196 -0.0387 -0.0272 -0.0098 -0.0114]\n",
      " [-0.0466 -0.0146 -0.0204 -0.0231 -0.0065]\n",
      " [-0.0174 -0.0492  0.0233 -0.0181 -0.005 ]\n",
      " [-0.0385  0.0406  0.2057  0.0211 -0.0134]\n",
      " [-0.0268 -0.009   0.045  -0.0052 -0.0038]]\n",
      "mean_state_value -0.0027490000270615545\n",
      "episode 191/600\n",
      "p1 0.7536000000000002 p0 0.06159999999999997\n",
      "trajectorySteps 39\n",
      "[[2 2 6 2 1]\n",
      " [2 0 1 0 2]\n",
      " [4 0 0 0 6]\n",
      " [0 0 2 0 6]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️⬅️➡️⬇️\n",
      "⬆️⏬⏫️➡️⬇️\n",
      "🔄⬅️⏬➡️⬆️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0195 -0.0385 -0.0279 -0.0098 -0.0114]\n",
      " [-0.0464 -0.0146 -0.0204 -0.0231 -0.0065]\n",
      " [-0.0175 -0.049   0.0233 -0.018  -0.0051]\n",
      " [-0.0384  0.0406  0.2075  0.0211 -0.0134]\n",
      " [-0.0267 -0.009   0.046  -0.0052 -0.0038]]\n",
      "mean_state_value -0.002617672668977555\n",
      "episode 192/600\n",
      "p1 0.7544000000000002 p0 0.06139999999999997\n",
      "trajectorySteps 41\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  1  0  0  0]\n",
      " [29  4  0  0  0]\n",
      " [ 2  1  2  0  0]\n",
      " [ 0  1  1  0  0]]\n",
      "➡️➡️⬅️➡️⬇️\n",
      "⬆️⏪⏫️➡️⬇️\n",
      "🔄⬅️⏬➡️⬆️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0195 -0.0384 -0.0278 -0.0098 -0.0113]\n",
      " [-0.0463 -0.0146 -0.0203 -0.023  -0.0065]\n",
      " [-0.0174 -0.0501  0.0234 -0.018  -0.0051]\n",
      " [-0.0383  0.0401  0.2094  0.0212 -0.0134]\n",
      " [-0.0266 -0.0089  0.047  -0.0052 -0.0038]]\n",
      "mean_state_value -0.0025212446934854043\n",
      "episode 193/600\n",
      "p1 0.7552000000000001 p0 0.06119999999999996\n",
      "trajectorySteps 20\n",
      "[[1 1 1 0 0]\n",
      " [3 0 1 0 0]\n",
      " [6 1 1 0 0]\n",
      " [3 0 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️⬅️⬅️➡️⬇️\n",
      "⬆️⏪⏫️➡️⬇️\n",
      "🔄⬅️⏬➡️⬆️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0194 -0.0384 -0.0283 -0.0097 -0.0113]\n",
      " [-0.0462 -0.0145 -0.0209 -0.0229 -0.0065]\n",
      " [-0.0175 -0.0499  0.0241 -0.0179 -0.0051]\n",
      " [-0.0384  0.0402  0.2099  0.0212 -0.0134]\n",
      " [-0.0265 -0.0089  0.0471 -0.0051 -0.0038]]\n",
      "mean_state_value -0.002483879127262718\n",
      "episode 194/600\n",
      "p1 0.7560000000000001 p0 0.060999999999999964\n",
      "trajectorySteps 107\n",
      "[[22 13  1  1  6]\n",
      " [ 4  0  0  0 27]\n",
      " [ 5  0  0  0 20]\n",
      " [ 2  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "➡️➡️⬅️➡️⬇️\n",
      "⬆️⏪⏫️➡️⬇️\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0197 -0.0385 -0.0282 -0.0097 -0.0113]\n",
      " [-0.0461 -0.0145 -0.0208 -0.0228 -0.0067]\n",
      " [-0.0175 -0.0498  0.0242 -0.0178 -0.0052]\n",
      " [-0.0382  0.0403  0.2117  0.0212 -0.0133]\n",
      " [-0.0264 -0.0088  0.0481 -0.0051 -0.0037]]\n",
      "mean_state_value -0.002348348115483031\n",
      "episode 195/600\n",
      "p1 0.7568000000000001 p0 0.060799999999999965\n",
      "trajectorySteps 17\n",
      "[[1 3 3 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 1 1]\n",
      " [0 0 2 1 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️⬅️➡️⬇️\n",
      "⬆️⏪⏫️➡️⬇️\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0196 -0.0384 -0.0281 -0.0097 -0.0112]\n",
      " [-0.046  -0.0145 -0.0207 -0.0228 -0.0067]\n",
      " [-0.0174 -0.0496  0.0242 -0.0184 -0.0052]\n",
      " [-0.0381  0.0404  0.2128  0.022  -0.0133]\n",
      " [-0.0264 -0.0088  0.0482 -0.005  -0.0037]]\n",
      "mean_state_value -0.0022409074193063933\n",
      "episode 196/600\n",
      "p1 0.7576000000000002 p0 0.060599999999999966\n",
      "trajectorySteps 38\n",
      "[[1 9 8 1 1]\n",
      " [2 0 0 0 1]\n",
      " [4 0 0 0 4]\n",
      " [0 0 2 1 1]\n",
      " [0 0 0 1 2]]\n",
      "➡️➡️⬅️➡️⬇️\n",
      "⬆️⏪⏫️➡️⬇️\n",
      "⬆️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️🔄\n",
      "[[-0.0196 -0.0383 -0.0281 -0.0097 -0.0112]\n",
      " [-0.0459 -0.0144 -0.0206 -0.0227 -0.0066]\n",
      " [-0.0176 -0.0495  0.0243 -0.0183 -0.0051]\n",
      " [-0.038   0.0405  0.2144  0.023  -0.0132]\n",
      " [-0.0263 -0.0088  0.0482 -0.0056 -0.0038]]\n",
      "mean_state_value -0.0021219947669993753\n",
      "episode 197/600\n",
      "p1 0.7584000000000002 p0 0.06039999999999997\n",
      "trajectorySteps 126\n",
      "[[ 8 17 18  2  1]\n",
      " [ 6  3  2  1  6]\n",
      " [ 2  0  2  1 52]\n",
      " [ 0  0  2  0  3]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏫️➡️⬇️\n",
      "🔄⬅️⏬➡️⬆️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️🔄\n",
      "[[-0.0199 -0.04   -0.0295 -0.0097 -0.0112]\n",
      " [-0.0458 -0.0144 -0.0209 -0.0226 -0.0066]\n",
      " [-0.0176 -0.0493  0.0247 -0.0189 -0.0054]\n",
      " [-0.0379  0.0405  0.2158  0.023  -0.0132]\n",
      " [-0.0262 -0.0087  0.0483 -0.0056 -0.0038]]\n",
      "mean_state_value -0.0021918041300501663\n",
      "episode 198/600\n",
      "p1 0.7592000000000001 p0 0.06019999999999996\n",
      "trajectorySteps 1024\n",
      "[[  3   2   2   5  26]\n",
      " [  2   1   2  30 429]\n",
      " [  3   0   1  24 432]\n",
      " [  2   0   2   3  40]\n",
      " [  0   0   0   0  15]]\n",
      "➡️➡️➡️➡️⬅️\n",
      "⬇️⏪⏫️⬇️🔄\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0199 -0.0399 -0.0294 -0.0097 -0.0113]\n",
      " [-0.0457 -0.0143 -0.022  -0.0238 -0.0084]\n",
      " [-0.0176 -0.0491  0.0257 -0.0196 -0.0068]\n",
      " [-0.0378  0.0406  0.2174  0.0225 -0.0137]\n",
      " [-0.0261 -0.0087  0.0484 -0.0056 -0.0041]]\n",
      "mean_state_value -0.0023577181538287036\n",
      "episode 199/600\n",
      "p1 0.7600000000000001 p0 0.05999999999999996\n",
      "trajectorySteps 162\n",
      "[[  1   2   1   6   7]\n",
      " [ 16   2   0   2  13]\n",
      " [100   6   1   1   0]\n",
      " [  2   0   2   0   0]\n",
      " [  0   0   0   0   0]]\n",
      "➡️➡️➡️➡️⬅️\n",
      "⬇️⏪⏫️➡️🔄\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0199 -0.0397 -0.0293 -0.0097 -0.0113]\n",
      " [-0.0464 -0.0143 -0.0219 -0.0238 -0.0084]\n",
      " [-0.0179 -0.0496  0.0268 -0.0201 -0.0068]\n",
      " [-0.0379  0.0407  0.2192  0.0225 -0.0137]\n",
      " [-0.0261 -0.0087  0.0484 -0.0056 -0.0041]]\n",
      "mean_state_value -0.002293565444635004\n",
      "episode 200/600\n",
      "p1 0.7608000000000001 p0 0.059799999999999964\n",
      "trajectorySteps 7\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [1 0 0 0 0]\n",
      " [2 1 2 0 0]\n",
      " [1 0 0 0 0]]\n",
      "➡️➡️➡️➡️⬅️\n",
      "⬇️⏪⏫️➡️🔄\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0199 -0.0396 -0.0292 -0.0097 -0.0113]\n",
      " [-0.0463 -0.0143 -0.0219 -0.0237 -0.0084]\n",
      " [-0.0178 -0.0494  0.0268 -0.02   -0.0067]\n",
      " [-0.0383  0.0414  0.2198  0.0225 -0.0137]\n",
      " [-0.0262 -0.0087  0.0485 -0.0055 -0.0041]]\n",
      "mean_state_value -0.002218633908862998\n",
      "episode 201/600\n",
      "p1 0.7616000000000002 p0 0.059599999999999966\n",
      "trajectorySteps 19\n",
      "[[ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [ 2  0  0  0  0]\n",
      " [ 2  0  2  0  0]\n",
      " [10  1  1  0  0]]\n",
      "➡️➡️➡️➡️⬅️\n",
      "⬇️⏪⏫️➡️🔄\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0198 -0.0395 -0.0291 -0.0096 -0.0112]\n",
      " [-0.0461 -0.0142 -0.0218 -0.0237 -0.0083]\n",
      " [-0.0178 -0.0493  0.0269 -0.0199 -0.0067]\n",
      " [-0.0382  0.0415  0.2213  0.0226 -0.0136]\n",
      " [-0.027  -0.0086  0.0495 -0.0055 -0.0041]]\n",
      "mean_state_value -0.0020936860052658372\n",
      "episode 202/600\n",
      "p1 0.7624000000000002 p0 0.05939999999999997\n",
      "trajectorySteps 51\n",
      "[[ 0  0  0  0  0]\n",
      " [ 3  0  0  0  0]\n",
      " [ 9  2  0  0  0]\n",
      " [17  0  2  0  0]\n",
      " [16  1  1  0  0]]\n",
      "➡️➡️➡️➡️⬅️\n",
      "⬆️⏪⏫️➡️🔄\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0198 -0.0394 -0.029  -0.0096 -0.0112]\n",
      " [-0.0461 -0.0142 -0.0217 -0.0236 -0.0083]\n",
      " [-0.0179 -0.0491  0.027  -0.0199 -0.0067]\n",
      " [-0.0382  0.0415  0.2231  0.0226 -0.0136]\n",
      " [-0.0275 -0.0085  0.0505 -0.0055 -0.004 ]]\n",
      "mean_state_value -0.0019538511975017094\n",
      "episode 203/600\n",
      "p1 0.7632000000000001 p0 0.05919999999999996\n",
      "trajectorySteps 361\n",
      "[[ 1  3 12 95 96]\n",
      " [ 1  1  2  4 11]\n",
      " [35  4  0  2  1]\n",
      " [29  0  2  0  0]\n",
      " [60  1  1  0  0]]\n",
      "➡️➡️🔄⬇️⬇️\n",
      "⬆️⏪⏫️➡️🔄\n",
      "⬆️🔄⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0197 -0.0392 -0.0295 -0.0102 -0.0119]\n",
      " [-0.0459 -0.0141 -0.0222 -0.0241 -0.0083]\n",
      " [-0.0181 -0.049   0.027  -0.0198 -0.0067]\n",
      " [-0.0384  0.0416  0.2246  0.0227 -0.0135]\n",
      " [-0.0283 -0.0084  0.0516 -0.0055 -0.004 ]]\n",
      "mean_state_value -0.001976925630542708\n",
      "episode 204/600\n",
      "p1 0.7640000000000001 p0 0.05899999999999996\n",
      "trajectorySteps 36\n",
      "[[1 2 6 0 1]\n",
      " [1 1 2 1 6]\n",
      " [1 6 0 0 2]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏪⏫️➡️🔄\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0197 -0.0391 -0.0302 -0.0101 -0.0118]\n",
      " [-0.0458 -0.0141 -0.0227 -0.024  -0.0083]\n",
      " [-0.018  -0.0495  0.0271 -0.0198 -0.0067]\n",
      " [-0.0383  0.0417  0.2265  0.0227 -0.0135]\n",
      " [-0.0282 -0.0084  0.0526 -0.0054 -0.004 ]]\n",
      "mean_state_value -0.0018812552200487213\n",
      "episode 205/600\n",
      "p1 0.7648000000000001 p0 0.05879999999999996\n",
      "trajectorySteps 64\n",
      "[[ 1  1  2  2  4]\n",
      " [ 1  0  0  2 33]\n",
      " [ 2  0  0  0  2]\n",
      " [ 4  0  2  0  1]\n",
      " [ 3  0  2  1  1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏪⏫️➡️🔄\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0196 -0.039  -0.0301 -0.0101 -0.0118]\n",
      " [-0.0456 -0.0141 -0.0227 -0.0239 -0.0084]\n",
      " [-0.018  -0.0494  0.0272 -0.0197 -0.0067]\n",
      " [-0.0382  0.0418  0.2284  0.0227 -0.0134]\n",
      " [-0.0281 -0.0083  0.0536 -0.0054 -0.004 ]]\n",
      "mean_state_value -0.0017198702266823928\n",
      "episode 206/600\n",
      "p1 0.7656000000000002 p0 0.058599999999999965\n",
      "trajectorySteps 94\n",
      "[[ 6  2  4  5  2]\n",
      " [ 3  1  0  5 55]\n",
      " [ 3  0  0  0  2]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "⬇️➡️➡️⬇️⬇️\n",
      "⬆️⏪⏫️➡️🔄\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.02   -0.0395 -0.0301 -0.0101 -0.0118]\n",
      " [-0.0455 -0.014  -0.0226 -0.0238 -0.0088]\n",
      " [-0.018  -0.0492  0.0272 -0.0196 -0.0066]\n",
      " [-0.0381  0.0419  0.2303  0.0228 -0.0134]\n",
      " [-0.0281 -0.0083  0.0546 -0.0053 -0.004 ]]\n",
      "mean_state_value -0.0016020063587784733\n",
      "episode 207/600\n",
      "p1 0.7664000000000002 p0 0.058399999999999966\n",
      "trajectorySteps 80\n",
      "[[18  3  1  1  0]\n",
      " [23  2  0  1  8]\n",
      " [ 2  0  0  1  3]\n",
      " [ 6  0  2  0  1]\n",
      " [ 5  0  1  1  1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏪⏫️➡️🔄\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0205 -0.0399 -0.03   -0.0101 -0.0118]\n",
      " [-0.0461 -0.014  -0.0225 -0.0238 -0.0088]\n",
      " [-0.0179 -0.0491  0.0273 -0.0196 -0.0067]\n",
      " [-0.038   0.042   0.2322  0.0228 -0.0133]\n",
      " [-0.028  -0.0083  0.0556 -0.0052 -0.004 ]]\n",
      "mean_state_value -0.0014979305340101418\n",
      "episode 208/600\n",
      "p1 0.7672000000000001 p0 0.05819999999999996\n",
      "trajectorySteps 26\n",
      "[[2 1 2 2 0]\n",
      " [1 0 0 1 4]\n",
      " [3 1 0 0 2]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 2]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏪⏫️➡️🔄\n",
      "⬆️🔄⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0205 -0.0398 -0.0299 -0.01   -0.0117]\n",
      " [-0.0461 -0.0139 -0.0224 -0.0237 -0.0088]\n",
      " [-0.0179 -0.0489  0.0274 -0.0195 -0.0066]\n",
      " [-0.0378  0.0421  0.2337  0.0228 -0.0133]\n",
      " [-0.0279 -0.0083  0.0567 -0.0051 -0.004 ]]\n",
      "mean_state_value -0.0013412287074645295\n",
      "episode 209/600\n",
      "p1 0.7680000000000001 p0 0.05799999999999996\n",
      "trajectorySteps 61\n",
      "[[ 1  3  4  3  3]\n",
      " [ 1  0  2  6 25]\n",
      " [ 2  0  0  0  1]\n",
      " [ 0  0  2  0  2]\n",
      " [ 0  0  2  1  3]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏪⏫️➡️🔄\n",
      "⬆️🔄⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0204 -0.0397 -0.0299 -0.01   -0.0117]\n",
      " [-0.0459 -0.0139 -0.0229 -0.0242 -0.0088]\n",
      " [-0.0179 -0.0487  0.0274 -0.0194 -0.0066]\n",
      " [-0.0377  0.0421  0.2356  0.0229 -0.0133]\n",
      " [-0.0278 -0.0082  0.0577 -0.0051 -0.0039]]\n",
      "mean_state_value -0.0012191623912663457\n",
      "episode 210/600\n",
      "p1 0.7688000000000001 p0 0.05779999999999996\n",
      "trajectorySteps 58\n",
      "[[ 2  2  1  1  0]\n",
      " [ 2  1  0  1 23]\n",
      " [ 2  0  0  0  2]\n",
      " [ 8  0  2  1  2]\n",
      " [ 7  0  0  0  1]]\n",
      "⬇️➡️➡️⬇️⬇️\n",
      "⬆️⏪⏫️➡️🔄\n",
      "⬆️🔄⏬➡️⬇️\n",
      "⬇️⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0206 -0.0401 -0.0298 -0.01   -0.0117]\n",
      " [-0.0458 -0.0139 -0.0229 -0.0241 -0.0091]\n",
      " [-0.0179 -0.0486  0.0275 -0.0194 -0.0067]\n",
      " [-0.0376  0.0422  0.2375  0.0239 -0.0138]\n",
      " [-0.0278 -0.0082  0.0577 -0.0051 -0.0039]]\n",
      "mean_state_value -0.0011116855246312931\n",
      "episode 211/600\n",
      "p1 0.7696000000000002 p0 0.057599999999999964\n",
      "trajectorySteps 64\n",
      "[[ 1  4  4  1  2]\n",
      " [ 1  0  2  4 33]\n",
      " [ 1  0  0  1  3]\n",
      " [ 0  0  2  0  2]\n",
      " [ 0  0  1  1  1]]\n",
      "⬇️⬅️➡️⬇️⬇️\n",
      "⬆️⏪⏫️⬇️🔄\n",
      "⬆️🔄⏬➡️⬇️\n",
      "⬇️⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0205 -0.04   -0.0303 -0.0099 -0.0116]\n",
      " [-0.0456 -0.0138 -0.0234 -0.0241 -0.0091]\n",
      " [-0.0178 -0.0484  0.0276 -0.0193 -0.0066]\n",
      " [-0.0375  0.0423  0.2391  0.024  -0.0137]\n",
      " [-0.0277 -0.0082  0.0588 -0.005  -0.0039]]\n",
      "mean_state_value -0.0010000236012156654\n",
      "episode 212/600\n",
      "p1 0.7704000000000002 p0 0.057399999999999965\n",
      "trajectorySteps 199\n",
      "[[75  9  0  0  0]\n",
      " [76  8  0  0  0]\n",
      " [ 9  5  1  0  0]\n",
      " [ 8  0  2  0  0]\n",
      " [ 6  0  0  0  0]]\n",
      "➡️⬅️➡️⬇️⬇️\n",
      "⬇️⏫️⏫️⬇️🔄\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0211 -0.04   -0.0302 -0.0099 -0.0116]\n",
      " [-0.05   -0.014  -0.0233 -0.024  -0.0091]\n",
      " [-0.0178 -0.049   0.0286 -0.0192 -0.0066]\n",
      " [-0.0375  0.0424  0.241   0.024  -0.0137]\n",
      " [-0.0277 -0.0081  0.0588 -0.005  -0.0039]]\n",
      "mean_state_value -0.0010712149795885125\n",
      "episode 213/600\n",
      "p1 0.7712000000000001 p0 0.05719999999999996\n",
      "trajectorySteps 67\n",
      "[[10 13  1  1  0]\n",
      " [14  1  0  1  0]\n",
      " [18  1  0  1  2]\n",
      " [ 0  0  2  1  1]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️⬅️➡️⬇️⬇️\n",
      "⬇️⏬⏫️⬇️🔄\n",
      "⬇️⬅️⏬➡️🔄\n",
      "⬇️⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.021  -0.0399 -0.0301 -0.0099 -0.0116]\n",
      " [-0.0505 -0.014  -0.0232 -0.0239 -0.009 ]\n",
      " [-0.018  -0.0488  0.0287 -0.0192 -0.0067]\n",
      " [-0.0373  0.0425  0.2425  0.025  -0.0142]\n",
      " [-0.0276 -0.0081  0.0589 -0.005  -0.0039]]\n",
      "mean_state_value -0.0009720527322033937\n",
      "episode 214/600\n",
      "p1 0.7720000000000001 p0 0.05699999999999996\n",
      "trajectorySteps 71\n",
      "[[ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [ 4  1  0  0  0]\n",
      " [30  1  2  0  0]\n",
      " [32  0  0  0  0]]\n",
      "➡️⬅️➡️⬇️⬇️\n",
      "⬇️⏬⏫️⬇️🔄\n",
      "⬇️⬅️⏬➡️🔄\n",
      "⬇️⏩️✅⏪🔄\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.021  -0.0398 -0.03   -0.0099 -0.0115]\n",
      " [-0.0504 -0.014  -0.0232 -0.0238 -0.009 ]\n",
      " [-0.0179 -0.0486  0.0288 -0.0191 -0.0066]\n",
      " [-0.0378  0.0436  0.2444  0.0251 -0.0141]\n",
      " [-0.0279 -0.0081  0.059  -0.005  -0.0039]]\n",
      "mean_state_value -0.0008328618782490809\n",
      "episode 215/600\n",
      "p1 0.7728000000000002 p0 0.05679999999999996\n",
      "trajectorySteps 39\n",
      "[[ 4  4  1  1  0]\n",
      " [ 1  0  0  2  0]\n",
      " [ 4  0  1  4 14]\n",
      " [ 1  0  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️⬅️➡️⬇️⬇️\n",
      "⬇️⏬⏫️➡️🔄\n",
      "⬇️⬅️⏬➡️🔄\n",
      "⬇️⏩️✅⏪🔄\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0209 -0.0397 -0.0299 -0.0098 -0.0115]\n",
      " [-0.0502 -0.0139 -0.0231 -0.0238 -0.009 ]\n",
      " [-0.0179 -0.0485  0.0298 -0.0196 -0.0066]\n",
      " [-0.0377  0.0436  0.246   0.0251 -0.0141]\n",
      " [-0.0278 -0.008   0.0591 -0.0049 -0.0039]]\n",
      "mean_state_value -0.0006876604320002764\n",
      "episode 216/600\n",
      "p1 0.7736000000000002 p0 0.05659999999999996\n",
      "trajectorySteps 43\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 2  0  0  0  0]\n",
      " [ 5  0  2  0  0]\n",
      " [32  1  1  0  0]]\n",
      "➡️⬅️➡️⬇️⬇️\n",
      "⬇️⏬⏫️➡️🔄\n",
      "⬇️⬅️⏬➡️🔄\n",
      "⬇️⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0209 -0.0395 -0.0298 -0.0098 -0.0115]\n",
      " [-0.05   -0.0139 -0.023  -0.0237 -0.009 ]\n",
      " [-0.0178 -0.0483  0.0299 -0.0195 -0.0066]\n",
      " [-0.0376  0.0437  0.2466  0.0252 -0.0141]\n",
      " [-0.0288 -0.0079  0.0597 -0.0049 -0.0039]]\n",
      "mean_state_value -0.0006191587459008356\n",
      "episode 217/600\n",
      "p1 0.7744000000000002 p0 0.056399999999999964\n",
      "trajectorySteps 15\n",
      "[[0 0 0 0 0]\n",
      " [1 0 0 0 0]\n",
      " [3 0 0 0 0]\n",
      " [2 0 2 0 0]\n",
      " [2 1 2 2 0]]\n",
      "➡️⬅️➡️⬇️⬇️\n",
      "⬇️⏬⏫️➡️🔄\n",
      "⬇️⬅️⏬➡️🔄\n",
      "🔄⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0208 -0.0394 -0.0297 -0.0098 -0.0114]\n",
      " [-0.0499 -0.0139 -0.0229 -0.0236 -0.0089]\n",
      " [-0.0178 -0.0481  0.03   -0.0195 -0.0066]\n",
      " [-0.0377  0.0438  0.2481  0.0252 -0.014 ]\n",
      " [-0.0292 -0.0079  0.0608 -0.0048 -0.0039]]\n",
      "mean_state_value -0.0004828924341191295\n",
      "episode 218/600\n",
      "p1 0.7752000000000001 p0 0.05619999999999996\n",
      "trajectorySteps 38\n",
      "[[ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [ 9  0  0  0  0]\n",
      " [25  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️⬅️➡️⬇️⬇️\n",
      "⬇️⏬⏫️➡️🔄\n",
      "⬇️⬅️⏬➡️🔄\n",
      "⬇️⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0208 -0.0393 -0.0296 -0.0098 -0.0114]\n",
      " [-0.0497 -0.0138 -0.0228 -0.0236 -0.0089]\n",
      " [-0.0179 -0.048   0.03   -0.0194 -0.0066]\n",
      " [-0.0383  0.0449  0.25    0.0252 -0.014 ]\n",
      " [-0.0291 -0.0079  0.0608 -0.0048 -0.0038]]\n",
      "mean_state_value -0.00033564839345047546\n",
      "episode 219/600\n",
      "p1 0.7760000000000001 p0 0.05599999999999996\n",
      "trajectorySteps 34\n",
      "[[0 0 0 0 0]\n",
      " [2 0 0 0 0]\n",
      " [6 1 0 0 0]\n",
      " [7 0 2 1 0]\n",
      " [9 1 3 2 0]]\n",
      "➡️⬅️➡️⬇️⬇️\n",
      "⬇️⏬⏫️➡️🔄\n",
      "⬇️⬅️⏬➡️🔄\n",
      "⬇️⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0207 -0.0392 -0.0295 -0.0097 -0.0114]\n",
      " [-0.0496 -0.0138 -0.0228 -0.0235 -0.0089]\n",
      " [-0.0178 -0.0478  0.0301 -0.0193 -0.0065]\n",
      " [-0.0382  0.045   0.252   0.0253 -0.0139]\n",
      " [-0.0297 -0.0079  0.0619 -0.0053 -0.0038]]\n",
      "mean_state_value -0.000206644949636441\n",
      "episode 220/600\n",
      "p1 0.7768000000000002 p0 0.05579999999999996\n",
      "trajectorySteps 11\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [1 0 0 0 0]\n",
      " [3 0 2 0 0]\n",
      " [3 1 1 0 0]]\n",
      "➡️⬅️➡️⬇️⬇️\n",
      "⬇️⏬⏫️➡️🔄\n",
      "⬇️⬅️⏬➡️🔄\n",
      "⬇️⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0207 -0.039  -0.0294 -0.0097 -0.0113]\n",
      " [-0.0494 -0.0137 -0.0227 -0.0234 -0.0089]\n",
      " [-0.0179 -0.0477  0.0302 -0.0193 -0.0065]\n",
      " [-0.0382  0.0451  0.2539  0.0253 -0.0139]\n",
      " [-0.0302 -0.0078  0.063  -0.0053 -0.0038]]\n",
      "mean_state_value -4.94358635539536e-05\n",
      "episode 221/600\n",
      "p1 0.7776000000000002 p0 0.05559999999999996\n",
      "trajectorySteps 30\n",
      "[[ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [ 2  0  0  0  0]\n",
      " [10  0  2  0  0]\n",
      " [13  1  1  0  0]]\n",
      "➡️⬅️➡️⬇️⬇️\n",
      "⬇️⏬⏫️➡️🔄\n",
      "⬇️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0206 -0.0389 -0.0293 -0.0097 -0.0113]\n",
      " [-0.0492 -0.0137 -0.0226 -0.0233 -0.0088]\n",
      " [-0.0178 -0.0475  0.0302 -0.0192 -0.0065]\n",
      " [-0.0381  0.0452  0.2556  0.0254 -0.0138]\n",
      " [-0.0307 -0.0077  0.064  -0.0053 -0.0038]]\n",
      "mean_state_value 9.84228189741287e-05\n",
      "episode 222/600\n",
      "p1 0.7784000000000002 p0 0.05539999999999996\n",
      "trajectorySteps 12\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [4 0 0 0 0]\n",
      " [5 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️⬅️➡️⬇️⬇️\n",
      "⬇️⏬⏫️➡️🔄\n",
      "➡️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0206 -0.0388 -0.0292 -0.0097 -0.0112]\n",
      " [-0.0491 -0.0137 -0.0225 -0.0232 -0.0088]\n",
      " [-0.0179 -0.0473  0.0303 -0.0191 -0.0065]\n",
      " [-0.0385  0.0458  0.2562  0.0254 -0.0138]\n",
      " [-0.0306 -0.0076  0.0641 -0.0053 -0.0038]]\n",
      "mean_state_value 0.00018225146437738233\n",
      "episode 223/600\n",
      "p1 0.7792000000000001 p0 0.05519999999999996\n",
      "trajectorySteps 10\n",
      "[[0 0 0 0 0]\n",
      " [1 0 0 0 0]\n",
      " [2 3 0 0 0]\n",
      " [0 2 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️⬅️➡️⬇️⬇️\n",
      "⬇️⏬⏫️➡️🔄\n",
      "➡️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0205 -0.0386 -0.0291 -0.0096 -0.0112]\n",
      " [-0.0489 -0.0136 -0.0224 -0.0232 -0.0088]\n",
      " [-0.0179 -0.0482  0.0304 -0.0191 -0.0065]\n",
      " [-0.0384  0.0464  0.2568  0.0254 -0.0137]\n",
      " [-0.0305 -0.0076  0.0642 -0.0052 -0.0038]]\n",
      "mean_state_value 0.0002504747338002728\n",
      "episode 224/600\n",
      "p1 0.7800000000000001 p0 0.05499999999999996\n",
      "trajectorySteps 10\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [4 3 1 0 0]\n",
      " [0 0 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️⬅️➡️⬇️⬇️\n",
      "⬇️⏬⏫️➡️🔄\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0205 -0.0385 -0.029  -0.0096 -0.0112]\n",
      " [-0.0488 -0.0136 -0.0224 -0.0231 -0.0088]\n",
      " [-0.018  -0.0486  0.0315 -0.019  -0.0065]\n",
      " [-0.0383  0.0465  0.2585  0.0255 -0.0137]\n",
      " [-0.0304 -0.0076  0.0643 -0.0052 -0.0038]]\n",
      "mean_state_value 0.00039488300411283405\n",
      "episode 225/600\n",
      "p1 0.7808000000000002 p0 0.05479999999999996\n",
      "trajectorySteps 174\n",
      "[[17 17  4  2  3]\n",
      " [ 3  0  2  5 29]\n",
      " [52  2  0  2 18]\n",
      " [ 7  1  2  0  2]\n",
      " [ 3  0  1  1  1]]\n",
      "➡️⬅️➡️⬇️⬇️\n",
      "⬇️⏬⏫️⬆️🔄\n",
      "⬆️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0204 -0.0385 -0.0295 -0.0096 -0.0111]\n",
      " [-0.0486 -0.0135 -0.0223 -0.0235 -0.0087]\n",
      " [-0.0183 -0.049   0.0315 -0.0189 -0.0064]\n",
      " [-0.0383  0.0466  0.2604  0.0255 -0.0136]\n",
      " [-0.0304 -0.0075  0.0653 -0.0051 -0.0038]]\n",
      "mean_state_value 0.0004838089209272933\n",
      "episode 226/600\n",
      "p1 0.7816000000000002 p0 0.05459999999999996\n",
      "trajectorySteps 239\n",
      "[[37 31  4  1  1]\n",
      " [62  8  2  0  1]\n",
      " [61 11  0  0  2]\n",
      " [ 6  0  2  0  5]\n",
      " [ 1  0  1  1  2]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "⬇️⏫️⏩️⬆️🔄\n",
      "⬇️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0208 -0.0397 -0.03   -0.0096 -0.0111]\n",
      " [-0.0509 -0.0141 -0.0228 -0.0234 -0.0087]\n",
      " [-0.0186 -0.0494  0.0316 -0.0189 -0.0064]\n",
      " [-0.0382  0.0467  0.262   0.0255 -0.0136]\n",
      " [-0.0303 -0.0075  0.0664 -0.005  -0.0038]]\n",
      "mean_state_value 0.000379097829208805\n",
      "episode 227/600\n",
      "p1 0.7824000000000002 p0 0.05439999999999996\n",
      "trajectorySteps 9\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [3 1 0 0 0]\n",
      " [2 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "⬇️⏫️⏩️⬆️🔄\n",
      "➡️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0207 -0.0396 -0.0299 -0.0095 -0.0111]\n",
      " [-0.0508 -0.014  -0.0227 -0.0234 -0.0087]\n",
      " [-0.0187 -0.0492  0.0317 -0.0188 -0.0064]\n",
      " [-0.0386  0.0478  0.2636  0.0256 -0.0135]\n",
      " [-0.0302 -0.0075  0.0665 -0.005  -0.0038]]\n",
      "mean_state_value 0.0005244958505459629\n",
      "episode 228/600\n",
      "p1 0.7832000000000001 p0 0.05419999999999996\n",
      "trajectorySteps 28\n",
      "[[ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [12 11  0  0  0]\n",
      " [ 0  2  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "⬇️⏫️⏩️⬆️🔄\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0207 -0.0395 -0.0298 -0.0095 -0.011 ]\n",
      " [-0.0506 -0.014  -0.0227 -0.0233 -0.0087]\n",
      " [-0.0187 -0.0501  0.0317 -0.0187 -0.0064]\n",
      " [-0.0384  0.0489  0.2656  0.0256 -0.0135]\n",
      " [-0.0301 -0.0074  0.0666 -0.005  -0.0038]]\n",
      "mean_state_value 0.0006609741590712157\n",
      "episode 229/600\n",
      "p1 0.7840000000000001 p0 0.05399999999999996\n",
      "trajectorySteps 195\n",
      "[[  0   0   0   0   0]\n",
      " [  9   0   0   0   0]\n",
      " [155   7   0   0   0]\n",
      " [ 21   1   2   0   0]\n",
      " [  0   0   0   0   0]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "⬇️⏫️⏩️⬆️🔄\n",
      "➡️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0206 -0.0393 -0.0297 -0.0095 -0.011 ]\n",
      " [-0.0505 -0.0139 -0.0226 -0.0232 -0.0086]\n",
      " [-0.0192 -0.05    0.0318 -0.0187 -0.0064]\n",
      " [-0.0389  0.05    0.2675  0.0257 -0.0134]\n",
      " [-0.03   -0.0074  0.0667 -0.005  -0.0038]]\n",
      "mean_state_value 0.0007978403416898294\n",
      "episode 230/600\n",
      "p1 0.7848000000000002 p0 0.05379999999999996\n",
      "trajectorySteps 10\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [4 3 1 0 0]\n",
      " [0 0 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "⬇️⏫️⏩️⬆️🔄\n",
      "➡️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0206 -0.0392 -0.0296 -0.0095 -0.011 ]\n",
      " [-0.0504 -0.0139 -0.0225 -0.0231 -0.0086]\n",
      " [-0.0194 -0.0504  0.0329 -0.0186 -0.0063]\n",
      " [-0.0387  0.0501  0.2695  0.0257 -0.0134]\n",
      " [-0.0299 -0.0074  0.0667 -0.0049 -0.0038]]\n",
      "mean_state_value 0.0009515758188119564\n",
      "episode 231/600\n",
      "p1 0.7856000000000002 p0 0.05359999999999996\n",
      "trajectorySteps 24\n",
      "[[ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [11  8  0  0  0]\n",
      " [ 1  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "⬇️⏫️⏩️⬆️🔄\n",
      "➡️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0205 -0.0391 -0.0295 -0.0094 -0.0109]\n",
      " [-0.0502 -0.0138 -0.0224 -0.023  -0.0086]\n",
      " [-0.0197 -0.0507  0.033  -0.0185 -0.0063]\n",
      " [-0.0386  0.0508  0.2701  0.0257 -0.0133]\n",
      " [-0.0298 -0.0073  0.0668 -0.0049 -0.0037]]\n",
      "mean_state_value 0.0010356600067133366\n",
      "episode 232/600\n",
      "p1 0.7864000000000002 p0 0.05339999999999996\n",
      "trajectorySteps 63\n",
      "[[32  4  0  0  0]\n",
      " [ 4  3  0  0  0]\n",
      " [ 9  8  0  0  0]\n",
      " [ 0  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "⬇️⏫️⏩️⬆️🔄\n",
      "⬇️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0208 -0.0389 -0.0294 -0.0094 -0.0109]\n",
      " [-0.0501 -0.0138 -0.0223 -0.0229 -0.0086]\n",
      " [-0.0197 -0.0526  0.033  -0.0185 -0.0063]\n",
      " [-0.0385  0.0515  0.2708  0.0258 -0.0133]\n",
      " [-0.0297 -0.0073  0.0669 -0.0049 -0.0037]]\n",
      "mean_state_value 0.0010547537371957143\n",
      "episode 233/600\n",
      "p1 0.7872000000000001 p0 0.053199999999999956\n",
      "trajectorySteps 18\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [8 1 0 0 0]\n",
      " [6 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "⬇️⏫️⏩️⬆️🔄\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0207 -0.0388 -0.0293 -0.0094 -0.0109]\n",
      " [-0.0499 -0.0138 -0.0223 -0.0229 -0.0085]\n",
      " [-0.0199 -0.0525  0.0331 -0.0184 -0.0063]\n",
      " [-0.0389  0.0526  0.2728  0.0258 -0.0132]\n",
      " [-0.0297 -0.0073  0.067  -0.0049 -0.0037]]\n",
      "mean_state_value 0.0012088905715182375\n",
      "episode 234/600\n",
      "p1 0.7880000000000001 p0 0.05299999999999996\n",
      "trajectorySteps 113\n",
      "[[  0   0   0   0   0]\n",
      " [  1   0   0   0   0]\n",
      " [103   4   1   0   0]\n",
      " [  2   0   2   0   0]\n",
      " [  0   0   0   0   0]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "⬇️⏫️⏩️⬆️🔄\n",
      "⬇️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0207 -0.0387 -0.0292 -0.0094 -0.0108]\n",
      " [-0.0497 -0.0137 -0.0222 -0.0228 -0.0085]\n",
      " [-0.0203 -0.0528  0.0342 -0.0183 -0.0063]\n",
      " [-0.0388  0.0527  0.2747  0.0259 -0.0132]\n",
      " [-0.0296 -0.0073  0.0671 -0.0048 -0.0037]]\n",
      "mean_state_value 0.001357784480582746\n",
      "episode 235/600\n",
      "p1 0.7888000000000002 p0 0.05279999999999996\n",
      "trajectorySteps 19\n",
      "[[0 0 0 0 0]\n",
      " [1 0 0 0 0]\n",
      " [7 3 0 0 0]\n",
      " [3 0 2 0 0]\n",
      " [1 1 1 0 0]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "⬇️⏫️⏩️⬆️🔄\n",
      "⬇️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0206 -0.0386 -0.0291 -0.0093 -0.0108]\n",
      " [-0.0496 -0.0137 -0.0221 -0.0227 -0.0085]\n",
      " [-0.0202 -0.0526  0.0343 -0.0183 -0.0062]\n",
      " [-0.0387  0.0528  0.2767  0.0259 -0.0131]\n",
      " [-0.03   -0.0072  0.0682 -0.0048 -0.0037]]\n",
      "mean_state_value 0.0015263693025199623\n",
      "episode 236/600\n",
      "p1 0.7896000000000002 p0 0.05259999999999996\n",
      "trajectorySteps 28\n",
      "[[ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [11  1  0  0  0]\n",
      " [11  2  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "⬇️⏫️⏩️⬆️🔄\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0206 -0.0384 -0.029  -0.0093 -0.0108]\n",
      " [-0.0494 -0.0136 -0.022  -0.0226 -0.0085]\n",
      " [-0.0206 -0.0524  0.0344 -0.0182 -0.0062]\n",
      " [-0.0396  0.0534  0.2774  0.0259 -0.0131]\n",
      " [-0.0299 -0.0071  0.0682 -0.0048 -0.0037]]\n",
      "mean_state_value 0.001585329481512423\n",
      "episode 237/600\n",
      "p1 0.7904000000000002 p0 0.05239999999999996\n",
      "trajectorySteps 72\n",
      "[[ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [58  6  2  0  0]\n",
      " [ 3  0  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "⬇️⏫️⏩️⬆️🔄\n",
      "➡️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0205 -0.0383 -0.0289 -0.0093 -0.0107]\n",
      " [-0.0492 -0.0136 -0.0219 -0.0225 -0.0084]\n",
      " [-0.0212 -0.0528  0.0345 -0.0181 -0.0062]\n",
      " [-0.0395  0.0535  0.2781  0.026  -0.013 ]\n",
      " [-0.0298 -0.0071  0.0683 -0.0048 -0.0037]]\n",
      "mean_state_value 0.0016375279448541606\n",
      "episode 238/600\n",
      "p1 0.7912000000000001 p0 0.052199999999999955\n",
      "trajectorySteps 18\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [8 5 1 0 0]\n",
      " [2 0 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "⬇️⏫️⏩️⬆️🔄\n",
      "⬇️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0205 -0.0382 -0.0288 -0.0093 -0.0107]\n",
      " [-0.0491 -0.0135 -0.0219 -0.0225 -0.0084]\n",
      " [-0.0213 -0.0531  0.0356 -0.0181 -0.0062]\n",
      " [-0.0393  0.0536  0.2801  0.026  -0.013 ]\n",
      " [-0.0297 -0.0071  0.0684 -0.0047 -0.0037]]\n",
      "mean_state_value 0.0017991464079760316\n",
      "episode 239/600\n",
      "p1 0.7920000000000001 p0 0.051999999999999956\n",
      "trajectorySteps 58\n",
      "[[ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [26  1  0  0  0]\n",
      " [24  1  2  0  0]\n",
      " [ 3  0  0  0  0]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "⬇️⏫️⏩️⬆️🔄\n",
      "⬇️⬅️⏬➡️🔄\n",
      "🔄⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0204 -0.038  -0.0287 -0.0092 -0.0107]\n",
      " [-0.0489 -0.0135 -0.0218 -0.0224 -0.0084]\n",
      " [-0.0214 -0.0529  0.0357 -0.018  -0.0062]\n",
      " [-0.0399  0.0548  0.2821  0.0261 -0.0129]\n",
      " [-0.0296 -0.007   0.0685 -0.0047 -0.0036]]\n",
      "mean_state_value 0.0019543217199004307\n",
      "episode 240/600\n",
      "p1 0.7928000000000002 p0 0.05179999999999996\n",
      "trajectorySteps 48\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 2  0  0  0  0]\n",
      " [37  0  2  0  0]\n",
      " [ 5  1  1  0  0]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "⬇️⏫️⏩️⬆️🔄\n",
      "⬇️⬅️⏬➡️🔄\n",
      "🔄⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0204 -0.0379 -0.0286 -0.0092 -0.0106]\n",
      " [-0.0487 -0.0135 -0.0217 -0.0223 -0.0084]\n",
      " [-0.0214 -0.0527  0.0358 -0.0179 -0.0062]\n",
      " [-0.04    0.0549  0.2828  0.0261 -0.0129]\n",
      " [-0.03   -0.0069  0.0692 -0.0047 -0.0036]]\n",
      "mean_state_value 0.002049018679394682\n",
      "episode 241/600\n",
      "p1 0.7936000000000002 p0 0.05159999999999996\n",
      "trajectorySteps 21\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 3  0  0  0  0]\n",
      " [14  2  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "⬇️⏫️⏩️⬆️🔄\n",
      "⬇️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0203 -0.0378 -0.0285 -0.0092 -0.0106]\n",
      " [-0.0485 -0.0134 -0.0216 -0.0222 -0.0083]\n",
      " [-0.0214 -0.0526  0.0359 -0.0179 -0.0061]\n",
      " [-0.0405  0.0555  0.2848  0.0261 -0.0128]\n",
      " [-0.0299 -0.0069  0.0693 -0.0047 -0.0036]]\n",
      "mean_state_value 0.0021936938934757014\n",
      "episode 242/600\n",
      "p1 0.7944000000000002 p0 0.05139999999999996\n",
      "trajectorySteps 42\n",
      "[[ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [19  2  0  0  0]\n",
      " [16  1  2  0  0]\n",
      " [ 1  0  0  0  0]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "⬇️⏫️⏩️⬆️🔄\n",
      "➡️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0203 -0.0376 -0.0283 -0.0092 -0.0105]\n",
      " [-0.0484 -0.0134 -0.0215 -0.0221 -0.0083]\n",
      " [-0.0214 -0.0524  0.0359 -0.0178 -0.0061]\n",
      " [-0.0411  0.0566  0.2868  0.0262 -0.0128]\n",
      " [-0.0298 -0.0069  0.0694 -0.0046 -0.0036]]\n",
      "mean_state_value 0.0023498190559888346\n",
      "episode 243/600\n",
      "p1 0.7952000000000001 p0 0.051199999999999954\n",
      "trajectorySteps 24\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [11 10  1  0  0]\n",
      " [ 0  0  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "⬇️⏫️⏩️⬆️🔄\n",
      "⬆️🔄⏬➡️🔄\n",
      "⬆️⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0202 -0.0375 -0.0282 -0.0091 -0.0105]\n",
      " [-0.0482 -0.0133 -0.0214 -0.022  -0.0083]\n",
      " [-0.0215 -0.0527  0.0371 -0.0177 -0.0061]\n",
      " [-0.0409  0.0567  0.2883  0.0262 -0.0127]\n",
      " [-0.0297 -0.0068  0.0694 -0.0046 -0.0036]]\n",
      "mean_state_value 0.0024995044872218874\n",
      "episode 244/600\n",
      "p1 0.7960000000000002 p0 0.050999999999999955\n",
      "trajectorySteps 137\n",
      "[[73  2  0  0  0]\n",
      " [28  1  0  0  0]\n",
      " [28  2  0  0  0]\n",
      " [ 0  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️⬅️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️⬆️🔄\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.021  -0.0374 -0.0281 -0.0091 -0.0105]\n",
      " [-0.0487 -0.0133 -0.0214 -0.022  -0.0082]\n",
      " [-0.0217 -0.0532  0.0371 -0.0177 -0.0061]\n",
      " [-0.0408  0.0579  0.29    0.0263 -0.0127]\n",
      " [-0.0296 -0.0068  0.0695 -0.0046 -0.0036]]\n",
      "mean_state_value 0.0025813798599553382\n",
      "episode 245/600\n",
      "p1 0.7968000000000002 p0 0.050799999999999956\n",
      "trajectorySteps 103\n",
      "[[10  8  1  3  0]\n",
      " [ 1  0  1  4 14]\n",
      " [25  3  0  1 13]\n",
      " [ 2  0  2  0 11]\n",
      " [ 0  0  2  1  1]]\n",
      "➡️⬅️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬆️\n",
      "➡️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.021  -0.0373 -0.0285 -0.0091 -0.0104]\n",
      " [-0.0485 -0.0132 -0.0213 -0.0219 -0.0085]\n",
      " [-0.0217 -0.053   0.0372 -0.0176 -0.0061]\n",
      " [-0.0406  0.058   0.2921  0.0263 -0.0126]\n",
      " [-0.0295 -0.0068  0.0706 -0.0046 -0.0036]]\n",
      "mean_state_value 0.002735196932791442\n",
      "episode 246/600\n",
      "p1 0.7976000000000002 p0 0.05059999999999996\n",
      "trajectorySteps 167\n",
      "[[40 39  1  2 22]\n",
      " [ 4  2  0  3 24]\n",
      " [ 4  2  0  1 14]\n",
      " [ 0  0  2  0  3]\n",
      " [ 0  0  2  1  1]]\n",
      "🔄⬅️➡️⬇️🔄\n",
      "⬇️⏫️⏩️➡️🔄\n",
      "➡️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0214 -0.0386 -0.0284 -0.0091 -0.0107]\n",
      " [-0.0484 -0.0133 -0.0212 -0.0218 -0.0086]\n",
      " [-0.0217 -0.0528  0.0373 -0.0175 -0.0062]\n",
      " [-0.0405  0.0581  0.2937  0.0263 -0.0127]\n",
      " [-0.0294 -0.0067  0.0717 -0.0046 -0.0035]]\n",
      "mean_state_value 0.0028039227591918382\n",
      "episode 247/600\n",
      "p1 0.7984000000000002 p0 0.05039999999999996\n",
      "trajectorySteps 15\n",
      "[[2 1 0 0 0]\n",
      " [1 1 0 0 0]\n",
      " [4 3 1 0 0]\n",
      " [0 0 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄⬅️➡️⬇️🔄\n",
      "⬇️⏫️⏩️➡️🔄\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0214 -0.0385 -0.0283 -0.009  -0.0106]\n",
      " [-0.0482 -0.0132 -0.0211 -0.0217 -0.0086]\n",
      " [-0.0218 -0.0536  0.0384 -0.0175 -0.0061]\n",
      " [-0.0404  0.0582  0.2954  0.0264 -0.0126]\n",
      " [-0.0293 -0.0067  0.0718 -0.0045 -0.0035]]\n",
      "mean_state_value 0.002935895269374258\n",
      "episode 248/600\n",
      "p1 0.7992000000000001 p0 0.05019999999999995\n",
      "trajectorySteps 117\n",
      "[[ 0  0  0  0  0]\n",
      " [ 3  0  0  0  0]\n",
      " [98  4  0  0  0]\n",
      " [ 9  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄⬅️➡️⬇️🔄\n",
      "⬆️⏫️⏩️➡️🔄\n",
      "⬇️⬅️⏬➡️🔄\n",
      "🔄⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0213 -0.0383 -0.0282 -0.009  -0.0106]\n",
      " [-0.0481 -0.0132 -0.021  -0.0216 -0.0085]\n",
      " [-0.022  -0.0539  0.0385 -0.0174 -0.0061]\n",
      " [-0.0404  0.0593  0.2974  0.0264 -0.0126]\n",
      " [-0.0292 -0.0067  0.0718 -0.0045 -0.0035]]\n",
      "mean_state_value 0.003090961245300521\n",
      "episode 249/600\n",
      "p1 0.8000000000000002 p0 0.049999999999999954\n",
      "trajectorySteps 154\n",
      "[[128   7   0   0   0]\n",
      " [ 11   0   0   0   0]\n",
      " [  2   0   0   0   0]\n",
      " [  3   1   2   0   0]\n",
      " [  0   0   0   0   0]]\n",
      "➡️⬅️➡️⬇️🔄\n",
      "⬇️⏫️⏩️➡️🔄\n",
      "⬇️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0225 -0.0382 -0.0281 -0.009  -0.0105]\n",
      " [-0.0483 -0.0131 -0.021  -0.0215 -0.0085]\n",
      " [-0.022  -0.0537  0.0386 -0.0173 -0.0061]\n",
      " [-0.0408  0.0605  0.2994  0.0264 -0.0125]\n",
      " [-0.0291 -0.0066  0.0719 -0.0045 -0.0035]]\n",
      "mean_state_value 0.0031983520125122237\n",
      "episode 250/600\n",
      "p1 0.8008000000000002 p0 0.049799999999999955\n",
      "trajectorySteps 192\n",
      "[[10 10  1  4 42]\n",
      " [ 4  1  0  6 44]\n",
      " [25  2  0  0  8]\n",
      " [24  0  2  1  5]\n",
      " [ 3  0  0  0  0]]\n",
      "➡️⬅️➡️⬇️⬅️\n",
      "⬇️⏫️⏩️⬆️🔄\n",
      "⬇️⬅️⏬➡️⬅️\n",
      "⬇️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0224 -0.0381 -0.028  -0.009  -0.0108]\n",
      " [-0.0486 -0.0131 -0.0209 -0.0215 -0.0085]\n",
      " [-0.0221 -0.0535  0.0387 -0.0173 -0.0062]\n",
      " [-0.0408  0.0606  0.3002  0.0272 -0.013 ]\n",
      " [-0.029  -0.0066  0.072  -0.0045 -0.0035]]\n",
      "mean_state_value 0.0032518240979858433\n",
      "episode 251/600\n",
      "p1 0.8016000000000002 p0 0.04959999999999996\n",
      "trajectorySteps 13\n",
      "[[0 0 0 0 0]\n",
      " [1 0 0 0 0]\n",
      " [2 0 0 0 0]\n",
      " [4 1 2 0 0]\n",
      " [3 0 0 0 0]]\n",
      "➡️⬅️➡️⬇️⬅️\n",
      "⬇️⏫️⏩️⬆️🔄\n",
      "⬇️⬅️⏬➡️⬅️\n",
      "⬇️⏩️✅⏪⬆️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0224 -0.038  -0.0279 -0.009  -0.0108]\n",
      " [-0.0484 -0.013  -0.0208 -0.0214 -0.0085]\n",
      " [-0.0221 -0.0533  0.0387 -0.0172 -0.0061]\n",
      " [-0.0412  0.0618  0.3022  0.0272 -0.0129]\n",
      " [-0.0292 -0.0066  0.0721 -0.0044 -0.0035]]\n",
      "mean_state_value 0.0034143350089546076\n",
      "episode 252/600\n",
      "p1 0.8024000000000002 p0 0.04939999999999996\n",
      "trajectorySteps 37\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [ 2  0  2  0  0]\n",
      " [28  2  2  0  0]]\n",
      "➡️⬅️➡️⬇️⬅️\n",
      "⬇️⏫️⏩️⬆️🔄\n",
      "⬇️⬅️⏬➡️⬅️\n",
      "⬇️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0223 -0.0378 -0.0278 -0.009  -0.0107]\n",
      " [-0.0483 -0.013  -0.0207 -0.0213 -0.0084]\n",
      " [-0.0221 -0.0531  0.0388 -0.0171 -0.0061]\n",
      " [-0.041   0.0619  0.3042  0.0272 -0.0129]\n",
      " [-0.0298 -0.0065  0.0727 -0.0044 -0.0035]]\n",
      "mean_state_value 0.0035615406700418547\n",
      "episode 253/600\n",
      "p1 0.8032000000000001 p0 0.04919999999999995\n",
      "trajectorySteps 11\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [3 0 0 0 0]\n",
      " [2 0 2 0 0]\n",
      " [2 1 1 0 0]]\n",
      "➡️⬅️➡️⬇️⬅️\n",
      "⬇️⏫️⏩️⬆️🔄\n",
      "⬇️⬅️⏬➡️⬅️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0223 -0.0377 -0.0277 -0.0089 -0.0107]\n",
      " [-0.0481 -0.013  -0.0206 -0.0212 -0.0084]\n",
      " [-0.0221 -0.0529  0.0389 -0.0171 -0.0061]\n",
      " [-0.0411  0.062   0.3063  0.0273 -0.0128]\n",
      " [-0.0302 -0.0064  0.0739 -0.0044 -0.0035]]\n",
      "mean_state_value 0.0037285264492828945\n",
      "episode 254/600\n",
      "p1 0.8040000000000002 p0 0.04899999999999995\n",
      "trajectorySteps 13\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [6 0 0 0 0]\n",
      " [4 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️⬅️➡️⬇️⬅️\n",
      "⬇️⏫️⏩️⬆️🔄\n",
      "🔄⬅️⏬➡️⬅️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0222 -0.0376 -0.0276 -0.0089 -0.0106]\n",
      " [-0.0479 -0.0129 -0.0206 -0.0211 -0.0084]\n",
      " [-0.0224 -0.0527  0.039  -0.017  -0.0061]\n",
      " [-0.0414  0.0631  0.3083  0.0273 -0.0128]\n",
      " [-0.0301 -0.0064  0.074  -0.0044 -0.0035]]\n",
      "mean_state_value 0.0038922992800324687\n",
      "episode 255/600\n",
      "p1 0.8048000000000002 p0 0.048799999999999955\n",
      "trajectorySteps 23\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [18  0  0  0  0]\n",
      " [ 2  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️⬅️➡️⬇️⬅️\n",
      "⬇️⏫️⏩️⬆️🔄\n",
      "➡️⬅️⏬➡️⬅️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0222 -0.0374 -0.0275 -0.0089 -0.0106]\n",
      " [-0.0477 -0.0129 -0.0205 -0.0211 -0.0084]\n",
      " [-0.0227 -0.0525  0.0391 -0.0169 -0.0061]\n",
      " [-0.0417  0.0643  0.3104  0.0274 -0.0127]\n",
      " [-0.03   -0.0064  0.0741 -0.0043 -0.0034]]\n",
      "mean_state_value 0.00405457746212677\n",
      "episode 256/600\n",
      "p1 0.8056000000000002 p0 0.048599999999999956\n",
      "trajectorySteps 12\n",
      "[[0 0 0 0 0]\n",
      " [1 0 0 0 0]\n",
      " [4 4 1 0 0]\n",
      " [0 0 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️⬅️➡️⬇️⬅️\n",
      "⬇️⏫️⏩️⬆️🔄\n",
      "🔄⬅️⏬➡️⬅️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0221 -0.0373 -0.0274 -0.0089 -0.0106]\n",
      " [-0.0476 -0.0128 -0.0204 -0.021  -0.0083]\n",
      " [-0.0227 -0.0528  0.0402 -0.0169 -0.0061]\n",
      " [-0.0416  0.0644  0.3119  0.0274 -0.0127]\n",
      " [-0.0299 -0.0063  0.0742 -0.0043 -0.0034]]\n",
      "mean_state_value 0.004208445062136012\n",
      "episode 257/600\n",
      "p1 0.8064000000000002 p0 0.04839999999999996\n",
      "trajectorySteps 341\n",
      "[[  0   0   0   0   0]\n",
      " [ 11   0   0   0   0]\n",
      " [282  16   0   0   0]\n",
      " [ 25   1   2   0   0]\n",
      " [  2   1   1   0   0]]\n",
      "➡️⬅️➡️⬇️⬅️\n",
      "⬆️⏫️⏩️⬆️🔄\n",
      "⬆️🔄⏬➡️⬅️\n",
      "⬇️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0221 -0.0372 -0.0273 -0.0088 -0.0105]\n",
      " [-0.0474 -0.0128 -0.0203 -0.0209 -0.0083]\n",
      " [-0.0236 -0.0526  0.0403 -0.0168 -0.006 ]\n",
      " [-0.042   0.064   0.3132  0.0274 -0.0126]\n",
      " [-0.0298 -0.0062  0.075  -0.0043 -0.0034]]\n",
      "mean_state_value 0.00428288610032597\n",
      "episode 258/600\n",
      "p1 0.8072000000000001 p0 0.04819999999999995\n",
      "trajectorySteps 137\n",
      "[[27 24  3 25  1]\n",
      " [ 3  1  0 20  1]\n",
      " [ 1  0  1 11 15]\n",
      " [ 0  0  2  0  2]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️➡️⬇️⬅️\n",
      "⬆️⏫️⏩️➡️🔄\n",
      "➡️🔄⏬🔄⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0222 -0.0373 -0.0272 -0.0088 -0.0105]\n",
      " [-0.0479 -0.0127 -0.0202 -0.0208 -0.0083]\n",
      " [-0.0236 -0.0524  0.0415 -0.0172 -0.0061]\n",
      " [-0.0418  0.0641  0.3152  0.0275 -0.0126]\n",
      " [-0.0297 -0.0062  0.0751 -0.0043 -0.0034]]\n",
      "mean_state_value 0.004405753144828437\n",
      "episode 259/600\n",
      "p1 0.8080000000000002 p0 0.04799999999999995\n",
      "trajectorySteps 69\n",
      "[[ 1  1  3  8  5]\n",
      " [ 1  0  0  4 38]\n",
      " [ 1  0  0  0  1]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "➡️➡️➡️⬇️⬅️\n",
      "⬆️⏫️⏩️➡️⬆️\n",
      "➡️🔄⏬🔄⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0221 -0.0371 -0.0272 -0.0088 -0.0105]\n",
      " [-0.0477 -0.0127 -0.0201 -0.0207 -0.0086]\n",
      " [-0.0236 -0.0522  0.0416 -0.0172 -0.0061]\n",
      " [-0.0417  0.0643  0.3173  0.0275 -0.0126]\n",
      " [-0.0296 -0.0062  0.0762 -0.0042 -0.0034]]\n",
      "mean_state_value 0.0045868511655239586\n",
      "episode 260/600\n",
      "p1 0.8088000000000002 p0 0.047799999999999954\n",
      "trajectorySteps 17\n",
      "[[1 1 2 3 0]\n",
      " [2 0 0 2 0]\n",
      " [1 0 0 2 0]\n",
      " [0 0 2 1 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️⬇️⬅️\n",
      "⬆️⏫️⏩️➡️⬆️\n",
      "➡️🔄⏬🔄⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0221 -0.037  -0.0271 -0.0088 -0.0104]\n",
      " [-0.0476 -0.0127 -0.0201 -0.0207 -0.0086]\n",
      " [-0.0235 -0.052   0.0416 -0.0177 -0.0061]\n",
      " [-0.0415  0.0644  0.3194  0.0287 -0.0125]\n",
      " [-0.0295 -0.0061  0.0763 -0.0041 -0.0034]]\n",
      "mean_state_value 0.004762337552774747\n",
      "episode 261/600\n",
      "p1 0.8096000000000002 p0 0.047599999999999955\n",
      "trajectorySteps 14\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 0]\n",
      " [2 0 0 3 0]\n",
      " [0 0 2 1 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️⬇️⬅️\n",
      "⬇️⏫️⏩️➡️⬆️\n",
      "➡️🔄⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.022  -0.0369 -0.027  -0.0088 -0.0104]\n",
      " [-0.0474 -0.0126 -0.02   -0.0206 -0.0085]\n",
      " [-0.0235 -0.0518  0.0417 -0.0181 -0.0061]\n",
      " [-0.0414  0.0645  0.3214  0.0298 -0.0125]\n",
      " [-0.0294 -0.0061  0.0764 -0.0041 -0.0034]]\n",
      "mean_state_value 0.004943022743531911\n",
      "episode 262/600\n",
      "p1 0.8104000000000002 p0 0.047399999999999956\n",
      "trajectorySteps 230\n",
      "[[ 0  2  4 55 49]\n",
      " [ 0  1  1 52 50]\n",
      " [ 3  5  1  0  1]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "➡️➡️➡️⬇️🔄\n",
      "⬇️⏫️⏩️⬇️⬇️\n",
      "➡️🔄⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0219 -0.0367 -0.0269 -0.0091 -0.0105]\n",
      " [-0.0472 -0.0126 -0.0199 -0.021  -0.0087]\n",
      " [-0.024  -0.0525  0.0418 -0.018  -0.0061]\n",
      " [-0.0412  0.0646  0.3235  0.0299 -0.0124]\n",
      " [-0.0293 -0.0061  0.0776 -0.004  -0.0033]]\n",
      "mean_state_value 0.00503304086430723\n",
      "episode 263/600\n",
      "p1 0.8112000000000001 p0 0.04719999999999995\n",
      "trajectorySteps 34\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 3 12  1  0  0]\n",
      " [ 9  1  2  0  0]\n",
      " [ 6  0  0  0  0]]\n",
      "➡️➡️➡️⬇️🔄\n",
      "⬇️⏫️⏩️⬇️⬇️\n",
      "➡️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0219 -0.0366 -0.0268 -0.0091 -0.0105]\n",
      " [-0.047  -0.0125 -0.0198 -0.021  -0.0087]\n",
      " [-0.0239 -0.0528  0.043  -0.018  -0.006 ]\n",
      " [-0.0416  0.0647  0.3256  0.0299 -0.0124]\n",
      " [-0.0293 -0.006   0.0777 -0.004  -0.0033]]\n",
      "mean_state_value 0.005183734653127943\n",
      "episode 264/600\n",
      "p1 0.8120000000000002 p0 0.04699999999999995\n",
      "trajectorySteps 41\n",
      "[[ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [18 14  1  0  0]\n",
      " [ 3  0  2  0  0]\n",
      " [ 2  0  0  0  0]]\n",
      "➡️➡️➡️⬇️🔄\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0218 -0.0364 -0.0267 -0.0091 -0.0105]\n",
      " [-0.0469 -0.0125 -0.0197 -0.0209 -0.0087]\n",
      " [-0.024  -0.0532  0.0442 -0.0179 -0.006 ]\n",
      " [-0.0415  0.0648  0.3276  0.03   -0.0123]\n",
      " [-0.0292 -0.006   0.0778 -0.004  -0.0033]]\n",
      "mean_state_value 0.005357283370869114\n",
      "episode 265/600\n",
      "p1 0.8128000000000002 p0 0.04679999999999995\n",
      "trajectorySteps 18\n",
      "[[2 1 1 1 0]\n",
      " [1 0 0 1 0]\n",
      " [3 1 0 1 1]\n",
      " [0 0 2 1 2]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️⬇️🔄\n",
      "⬇️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️🔄\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0218 -0.0363 -0.0266 -0.009  -0.0104]\n",
      " [-0.0467 -0.0124 -0.0197 -0.0208 -0.0087]\n",
      " [-0.0239 -0.053   0.0442 -0.0178 -0.006 ]\n",
      " [-0.0413  0.0649  0.3297  0.0311 -0.0128]\n",
      " [-0.0291 -0.006   0.0778 -0.0039 -0.0033]]\n",
      "mean_state_value 0.005535444263091407\n",
      "episode 266/600\n",
      "p1 0.8136000000000002 p0 0.046599999999999954\n",
      "trajectorySteps 24\n",
      "[[0 0 0 0 0]\n",
      " [4 0 0 0 0]\n",
      " [5 0 0 0 0]\n",
      " [6 0 2 0 0]\n",
      " [5 1 1 0 0]]\n",
      "➡️➡️➡️⬇️🔄\n",
      "🔄⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0218 -0.0362 -0.0264 -0.009  -0.0104]\n",
      " [-0.0465 -0.0124 -0.0196 -0.0207 -0.0086]\n",
      " [-0.0238 -0.0528  0.0443 -0.0177 -0.006 ]\n",
      " [-0.0414  0.065   0.3318  0.0312 -0.0127]\n",
      " [-0.0295 -0.0059  0.079  -0.0039 -0.0033]]\n",
      "mean_state_value 0.005709002219661634\n",
      "episode 267/600\n",
      "p1 0.8144000000000002 p0 0.046399999999999955\n",
      "trajectorySteps 10\n",
      "[[0 0 0 0 0]\n",
      " [2 1 0 0 0]\n",
      " [2 1 0 0 0]\n",
      " [1 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️⬇️🔄\n",
      "⬇️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0217 -0.036  -0.0263 -0.009  -0.0104]\n",
      " [-0.0468 -0.0124 -0.0195 -0.0206 -0.0086]\n",
      " [-0.0238 -0.053   0.0444 -0.0177 -0.006 ]\n",
      " [-0.0412  0.0662  0.3339  0.0312 -0.0127]\n",
      " [-0.0294 -0.0058  0.0791 -0.0039 -0.0033]]\n",
      "mean_state_value 0.005871674246061565\n",
      "episode 268/600\n",
      "p1 0.8152000000000001 p0 0.04619999999999995\n",
      "trajectorySteps 63\n",
      "[[ 1  1  1  1  0]\n",
      " [20  0  0  1  0]\n",
      " [23  1  0  2  8]\n",
      " [ 1  0  2  1  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️➡️⬇️🔄\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0217 -0.0359 -0.0262 -0.0089 -0.0103]\n",
      " [-0.0467 -0.0123 -0.0194 -0.0205 -0.0086]\n",
      " [-0.0236 -0.0528  0.0445 -0.018  -0.006 ]\n",
      " [-0.0411  0.0663  0.336   0.0324 -0.0126]\n",
      " [-0.0293 -0.0058  0.0792 -0.0039 -0.0033]]\n",
      "mean_state_value 0.006055897543783866\n",
      "episode 269/600\n",
      "p1 0.8160000000000002 p0 0.04599999999999995\n",
      "trajectorySteps 54\n",
      "[[ 2  1  1  0  0]\n",
      " [ 1  0  1  3  1]\n",
      " [ 2  1  1  6 32]\n",
      " [ 0  0  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️⬅️➡️⬇️🔄\n",
      "⬇️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0216 -0.0358 -0.0266 -0.0089 -0.0103]\n",
      " [-0.0466 -0.0123 -0.0193 -0.0204 -0.0086]\n",
      " [-0.0236 -0.0526  0.0452 -0.0185 -0.006 ]\n",
      " [-0.0409  0.0664  0.3368  0.0324 -0.0126]\n",
      " [-0.0292 -0.0058  0.0793 -0.0038 -0.0033]]\n",
      "mean_state_value 0.006140099863338372\n",
      "episode 270/600\n",
      "p1 0.8168000000000002 p0 0.04579999999999995\n",
      "trajectorySteps 200\n",
      "[[49 51  2  2  0]\n",
      " [32  7  1  3  1]\n",
      " [36  6  0  0  1]\n",
      " [ 2  0  2  0  2]\n",
      " [ 0  0  1  1  1]]\n",
      "🔄➡️➡️➡️🔄\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0223 -0.0374 -0.0265 -0.009  -0.0103]\n",
      " [-0.0474 -0.0128 -0.0193 -0.0208 -0.0085]\n",
      " [-0.0239 -0.0528  0.0453 -0.0184 -0.006 ]\n",
      " [-0.0408  0.0665  0.3385  0.0325 -0.0125]\n",
      " [-0.0291 -0.0057  0.0805 -0.0037 -0.0032]]\n",
      "mean_state_value 0.006115715376474315\n",
      "episode 271/600\n",
      "p1 0.8176000000000002 p0 0.04559999999999995\n",
      "trajectorySteps 58\n",
      "[[ 0  2  2  3 40]\n",
      " [ 1  1  0  0  1]\n",
      " [ 1  0  0  0  1]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0222 -0.0373 -0.0264 -0.0091 -0.0106]\n",
      " [-0.0477 -0.0127 -0.0192 -0.0207 -0.0085]\n",
      " [-0.0239 -0.0526  0.0454 -0.0183 -0.006 ]\n",
      " [-0.0407  0.0667  0.3406  0.0325 -0.0125]\n",
      " [-0.029  -0.0057  0.0817 -0.0036 -0.0032]]\n",
      "mean_state_value 0.006284897448511945\n",
      "episode 272/600\n",
      "p1 0.8184000000000002 p0 0.045399999999999954\n",
      "trajectorySteps 54\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [27  2  0  0  0]\n",
      " [22  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "➡️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0222 -0.0371 -0.0263 -0.009  -0.0106]\n",
      " [-0.0475 -0.0127 -0.0191 -0.0206 -0.0085]\n",
      " [-0.024  -0.0524  0.0455 -0.0183 -0.006 ]\n",
      " [-0.041   0.0679  0.3422  0.0326 -0.0124]\n",
      " [-0.0289 -0.0057  0.0818 -0.0036 -0.0032]]\n",
      "mean_state_value 0.0064393235548329385\n",
      "episode 273/600\n",
      "p1 0.8192000000000002 p0 0.04519999999999995\n",
      "trajectorySteps 49\n",
      "[[22  2  1  1  1]\n",
      " [ 1  1  0  0  2]\n",
      " [ 4  3  0  0  2]\n",
      " [ 1  0  2  0  1]\n",
      " [ 0  1  2  1  1]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "➡️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0221 -0.037  -0.0262 -0.009  -0.0105]\n",
      " [-0.0473 -0.0126 -0.019  -0.0205 -0.0085]\n",
      " [-0.0241 -0.0527  0.0455 -0.0182 -0.006 ]\n",
      " [-0.0409  0.068   0.3443  0.0326 -0.0123]\n",
      " [-0.0288 -0.0055  0.0825 -0.0036 -0.0032]]\n",
      "mean_state_value 0.006595032363438629\n",
      "episode 274/600\n",
      "p1 0.8200000000000002 p0 0.04499999999999995\n",
      "trajectorySteps 9\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [3 2 0 0 0]\n",
      " [1 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0221 -0.0368 -0.0261 -0.009  -0.0105]\n",
      " [-0.0471 -0.0126 -0.0189 -0.0205 -0.0085]\n",
      " [-0.0242 -0.0529  0.0456 -0.0181 -0.006 ]\n",
      " [-0.0407  0.0692  0.3465  0.0327 -0.0123]\n",
      " [-0.0287 -0.0055  0.0826 -0.0036 -0.0032]]\n",
      "mean_state_value 0.006776739388661417\n",
      "episode 275/600\n",
      "p1 0.8208000000000002 p0 0.04479999999999995\n",
      "trajectorySteps 68\n",
      "[[18  1  2  2  1]\n",
      " [ 2  0  0  0  3]\n",
      " [14  1  0  0  2]\n",
      " [12  0  2  0  2]\n",
      " [ 0  1  2  2  1]]\n",
      "⬇️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0221 -0.0367 -0.0259 -0.0089 -0.0105]\n",
      " [-0.0469 -0.0125 -0.0188 -0.0204 -0.0085]\n",
      " [-0.0241 -0.0527  0.0457 -0.018  -0.006 ]\n",
      " [-0.0406  0.0693  0.3486  0.0327 -0.0122]\n",
      " [-0.0286 -0.0054  0.0834 -0.0037 -0.0032]]\n",
      "mean_state_value 0.006952773620074016\n",
      "episode 276/600\n",
      "p1 0.8216000000000002 p0 0.04459999999999995\n",
      "trajectorySteps 27\n",
      "[[2 3 2 1 1]\n",
      " [3 2 1 0 1]\n",
      " [2 0 0 0 2]\n",
      " [0 0 2 0 2]\n",
      " [0 0 1 1 1]]\n",
      "🔄⬅️➡️➡️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0221 -0.0367 -0.0263 -0.0089 -0.0105]\n",
      " [-0.0472 -0.0126 -0.0192 -0.0203 -0.0085]\n",
      " [-0.024  -0.0524  0.0458 -0.018  -0.0059]\n",
      " [-0.0405  0.0694  0.3507  0.0328 -0.0122]\n",
      " [-0.0285 -0.0054  0.0846 -0.0036 -0.0032]]\n",
      "mean_state_value 0.007097785917472779\n",
      "episode 277/600\n",
      "p1 0.8224000000000002 p0 0.04439999999999995\n",
      "trajectorySteps 13\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [3 0 0 0 0]\n",
      " [3 1 2 0 0]\n",
      " [1 1 2 0 0]]\n",
      "🔄⬅️➡️➡️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0221 -0.0365 -0.0262 -0.0089 -0.0105]\n",
      " [-0.047  -0.0125 -0.0191 -0.0202 -0.0085]\n",
      " [-0.024  -0.0522  0.0459 -0.0179 -0.0059]\n",
      " [-0.0407  0.0691  0.3515  0.0328 -0.0121]\n",
      " [-0.0284 -0.0053  0.0852 -0.0035 -0.0032]]\n",
      "mean_state_value 0.0071917609518651945\n",
      "episode 278/600\n",
      "p1 0.8232000000000002 p0 0.04419999999999995\n",
      "trajectorySteps 25\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 3  1  0  0  0]\n",
      " [ 5  1  2  0  0]\n",
      " [13  0  0  0  0]]\n",
      "🔄⬅️➡️➡️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬇️🔄⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.022  -0.0364 -0.0261 -0.0089 -0.0104]\n",
      " [-0.0468 -0.0125 -0.019  -0.0201 -0.0084]\n",
      " [-0.024  -0.052   0.046  -0.0178 -0.0059]\n",
      " [-0.041   0.0703  0.3537  0.0328 -0.0121]\n",
      " [-0.0284 -0.0053  0.0853 -0.0035 -0.0031]]\n",
      "mean_state_value 0.007371898728438051\n",
      "episode 279/600\n",
      "p1 0.8240000000000002 p0 0.04399999999999995\n",
      "trajectorySteps 224\n",
      "[[184   8   1   1   2]\n",
      " [ 13   0   0   0   1]\n",
      " [  1   0   0   0   1]\n",
      " [  0   0   2   0   1]\n",
      " [  0   1   3   1   4]]\n",
      "⬇️🔄➡️➡️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬇️🔄⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬆️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0229 -0.0364 -0.0259 -0.0089 -0.0104]\n",
      " [-0.0467 -0.0124 -0.019  -0.02   -0.0084]\n",
      " [-0.024  -0.0518  0.046  -0.0177 -0.0059]\n",
      " [-0.0409  0.0704  0.3558  0.0329 -0.012 ]\n",
      " [-0.0283 -0.0053  0.0861 -0.0036 -0.0032]]\n",
      "mean_state_value 0.007503501415907621\n",
      "episode 280/600\n",
      "p1 0.8248000000000002 p0 0.04379999999999995\n",
      "trajectorySteps 41\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 3  0  0  0  0]\n",
      " [ 1  0  2  0  0]\n",
      " [33  1  1  0  0]]\n",
      "⬇️🔄➡️➡️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬇️🔄⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0229 -0.0362 -0.0258 -0.0089 -0.0104]\n",
      " [-0.0465 -0.0124 -0.0189 -0.0199 -0.0084]\n",
      " [-0.0239 -0.0516  0.0461 -0.0176 -0.0059]\n",
      " [-0.0407  0.0706  0.3579  0.0329 -0.012 ]\n",
      " [-0.0293 -0.0052  0.0873 -0.0035 -0.0032]]\n",
      "mean_state_value 0.007667702746134934\n",
      "episode 281/600\n",
      "p1 0.8256000000000002 p0 0.04359999999999995\n",
      "trajectorySteps 10\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [1 3 0 0 0]\n",
      " [0 2 2 0 0]\n",
      " [0 1 1 0 0]]\n",
      "⬇️🔄➡️➡️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0228 -0.0361 -0.0257 -0.0089 -0.0104]\n",
      " [-0.0463 -0.0123 -0.0188 -0.0199 -0.0084]\n",
      " [-0.0239 -0.0522  0.0462 -0.0176 -0.0058]\n",
      " [-0.0406  0.0702  0.3597  0.033  -0.0119]\n",
      " [-0.0292 -0.005   0.0885 -0.0035 -0.0032]]\n",
      "mean_state_value 0.0078092492490572565\n",
      "episode 282/600\n",
      "p1 0.8264000000000002 p0 0.04339999999999995\n",
      "trajectorySteps 107\n",
      "[[10 15  3  2  1]\n",
      " [ 7  2  0  1  2]\n",
      " [ 1  0  0  4 29]\n",
      " [ 0  0  2  2 26]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️⬅️➡️➡️⬇️\n",
      "⬇️⏫️⏩️⬇️🔄\n",
      "⬇️⬅️⏬➡️🔄\n",
      "⬇️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0229 -0.0365 -0.0257 -0.0088 -0.0103]\n",
      " [-0.0467 -0.0124 -0.0187 -0.0198 -0.0084]\n",
      " [-0.0239 -0.052   0.0463 -0.0175 -0.0059]\n",
      " [-0.0404  0.0704  0.3605  0.0336 -0.0128]\n",
      " [-0.029  -0.005   0.0886 -0.0035 -0.0031]]\n",
      "mean_state_value 0.007845720871342825\n",
      "episode 283/600\n",
      "p1 0.8272000000000002 p0 0.04319999999999995\n",
      "trajectorySteps 23\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [5 1 0 0 0]\n",
      " [7 1 2 0 0]\n",
      " [5 2 0 0 0]]\n",
      "➡️⬅️➡️➡️⬇️\n",
      "⬇️⏫️⏩️⬇️🔄\n",
      "⬇️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0229 -0.0364 -0.0256 -0.0088 -0.0103]\n",
      " [-0.0465 -0.0123 -0.0186 -0.0197 -0.0083]\n",
      " [-0.0238 -0.0518  0.0464 -0.0174 -0.0059]\n",
      " [-0.0405  0.0716  0.3627  0.0337 -0.0127]\n",
      " [-0.0294 -0.0058  0.0887 -0.0035 -0.0031]]\n",
      "mean_state_value 0.007989486087555706\n",
      "episode 284/600\n",
      "p1 0.8280000000000002 p0 0.04299999999999995\n",
      "trajectorySteps 58\n",
      "[[ 0  0  0  0  0]\n",
      " [ 3  0  0  0  0]\n",
      " [26  1  0  0  0]\n",
      " [24  1  2  0  0]\n",
      " [ 1  0  0  0  0]]\n",
      "➡️⬅️➡️➡️⬇️\n",
      "⬆️⏫️⏩️⬇️🔄\n",
      "⬇️⬅️⏬➡️🔄\n",
      "⬇️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0228 -0.0362 -0.0255 -0.0088 -0.0102]\n",
      " [-0.0463 -0.0123 -0.0185 -0.0196 -0.0083]\n",
      " [-0.0244 -0.0516  0.0464 -0.0173 -0.0059]\n",
      " [-0.041   0.0729  0.3648  0.0337 -0.0127]\n",
      " [-0.0292 -0.0058  0.0888 -0.0034 -0.0031]]\n",
      "mean_state_value 0.008144681003848938\n",
      "episode 285/600\n",
      "p1 0.8288000000000002 p0 0.04279999999999995\n",
      "trajectorySteps 13\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [2 0 0 0 0]\n",
      " [3 0 2 0 0]\n",
      " [4 1 1 0 0]]\n",
      "➡️⬅️➡️➡️⬇️\n",
      "⬆️⏫️⏩️⬇️🔄\n",
      "⬇️⬅️⏬➡️🔄\n",
      "⬇️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0228 -0.0361 -0.0253 -0.0088 -0.0102]\n",
      " [-0.0462 -0.0122 -0.0185 -0.0195 -0.0083]\n",
      " [-0.0244 -0.0513  0.0465 -0.0173 -0.0059]\n",
      " [-0.0412  0.073   0.367   0.0338 -0.0126]\n",
      " [-0.0296 -0.0057  0.0901 -0.0034 -0.0031]]\n",
      "mean_state_value 0.008320290121205743\n",
      "episode 286/600\n",
      "p1 0.8296000000000002 p0 0.04259999999999995\n",
      "trajectorySteps 17\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [2 1 0 0 0]\n",
      " [6 1 2 0 0]\n",
      " [5 0 0 0 0]]\n",
      "➡️⬅️➡️➡️⬇️\n",
      "⬆️⏫️⏩️⬇️🔄\n",
      "⬇️⬅️⏬➡️🔄\n",
      "⬇️⏩️✅⏪⬆️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0227 -0.0359 -0.0252 -0.0087 -0.0102]\n",
      " [-0.046  -0.0122 -0.0184 -0.0194 -0.0083]\n",
      " [-0.0244 -0.0511  0.0466 -0.0172 -0.0059]\n",
      " [-0.0414  0.0742  0.3691  0.0338 -0.0126]\n",
      " [-0.0295 -0.0056  0.0902 -0.0034 -0.0031]]\n",
      "mean_state_value 0.008511509700732332\n",
      "episode 287/600\n",
      "p1 0.8304000000000002 p0 0.04239999999999995\n",
      "trajectorySteps 196\n",
      "[[45 48  1  1  1]\n",
      " [ 4  3  0  3 32]\n",
      " [ 1  0  0  4 46]\n",
      " [ 0  0  2  1  4]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬇️⏫️⏩️⬇️⬆️\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬆️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.023  -0.0373 -0.0251 -0.0087 -0.0101]\n",
      " [-0.0458 -0.0121 -0.0183 -0.0193 -0.0083]\n",
      " [-0.0243 -0.0509  0.0467 -0.0171 -0.0061]\n",
      " [-0.0413  0.0744  0.3713  0.035  -0.0129]\n",
      " [-0.0294 -0.0056  0.0903 -0.0034 -0.0031]]\n",
      "mean_state_value 0.00861270193719798\n",
      "episode 288/600\n",
      "p1 0.8312000000000002 p0 0.042199999999999946\n",
      "trajectorySteps 26\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 2  1  0  0  0]\n",
      " [ 1  1  2  0  0]\n",
      " [18  1  0  0  0]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬇️⏫️⏩️⬇️⬆️\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬆️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.023  -0.0372 -0.025  -0.0087 -0.0101]\n",
      " [-0.0456 -0.0121 -0.0182 -0.0192 -0.0083]\n",
      " [-0.0243 -0.0507  0.0468 -0.017  -0.0061]\n",
      " [-0.0411  0.0756  0.3734  0.0351 -0.0129]\n",
      " [-0.0302 -0.006   0.0904 -0.0033 -0.0031]]\n",
      "mean_state_value 0.008770630501194344\n",
      "episode 289/600\n",
      "p1 0.8320000000000002 p0 0.04199999999999995\n",
      "trajectorySteps 6\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [2 0 0 0 0]\n",
      " [1 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬇️⏫️⏩️⬇️⬆️\n",
      "➡️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬆️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0229 -0.037  -0.0249 -0.0087 -0.01  ]\n",
      " [-0.0455 -0.0121 -0.0181 -0.0192 -0.0083]\n",
      " [-0.0243 -0.0504  0.0468 -0.017  -0.006 ]\n",
      " [-0.0413  0.0769  0.3756  0.0351 -0.0128]\n",
      " [-0.03   -0.0059  0.0905 -0.0033 -0.0031]]\n",
      "mean_state_value 0.008961787356895706\n",
      "episode 290/600\n",
      "p1 0.8328000000000002 p0 0.04179999999999995\n",
      "trajectorySteps 119\n",
      "[[ 0  1  1  2 21]\n",
      " [ 0  1  0  2 21]\n",
      " [14 11  0  5 18]\n",
      " [ 0  0  2  0 17]\n",
      " [ 0  0  1  1  1]]\n",
      "🔄➡️➡️➡️⬅️\n",
      "⬇️⏫️⏩️⬇️⬇️\n",
      "➡️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0229 -0.0369 -0.0248 -0.0086 -0.0101]\n",
      " [-0.0453 -0.012  -0.018  -0.0191 -0.0083]\n",
      " [-0.0244 -0.0507  0.0469 -0.0169 -0.006 ]\n",
      " [-0.0412  0.077   0.3777  0.0352 -0.0128]\n",
      " [-0.0299 -0.0059  0.0917 -0.0032 -0.003 ]]\n",
      "mean_state_value 0.00914475845898635\n",
      "episode 291/600\n",
      "p1 0.8336000000000002 p0 0.04159999999999995\n",
      "trajectorySteps 33\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [ 3  1  2  0  0]\n",
      " [25  1  0  0  0]]\n",
      "🔄➡️➡️➡️⬅️\n",
      "⬇️⏫️⏩️⬇️⬇️\n",
      "➡️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0228 -0.0367 -0.0247 -0.0086 -0.01  ]\n",
      " [-0.0451 -0.012  -0.018  -0.019  -0.0083]\n",
      " [-0.0243 -0.0505  0.047  -0.0168 -0.006 ]\n",
      " [-0.0411  0.0777  0.3787  0.0352 -0.0127]\n",
      " [-0.0306 -0.0063  0.0919 -0.0032 -0.003 ]]\n",
      "mean_state_value 0.00923510665514226\n",
      "episode 292/600\n",
      "p1 0.8344000000000003 p0 0.04139999999999995\n",
      "trajectorySteps 13\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [5 3 0 0 0]\n",
      " [1 2 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄➡️➡️➡️⬅️\n",
      "⬇️⏫️⏩️⬇️⬇️\n",
      "➡️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0227 -0.0366 -0.0246 -0.0086 -0.01  ]\n",
      " [-0.0449 -0.0119 -0.0179 -0.0189 -0.0082]\n",
      " [-0.0247 -0.0507  0.0471 -0.0167 -0.006 ]\n",
      " [-0.0409  0.0786  0.3808  0.0353 -0.0127]\n",
      " [-0.0304 -0.0062  0.092  -0.0031 -0.003 ]]\n",
      "mean_state_value 0.009397589965533254\n",
      "episode 293/600\n",
      "p1 0.8352000000000002 p0 0.041199999999999945\n",
      "trajectorySteps 13\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [5 5 1 0 0]\n",
      " [0 0 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄➡️➡️➡️⬅️\n",
      "⬇️⏫️⏩️⬇️⬇️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0227 -0.0364 -0.0245 -0.0086 -0.0099]\n",
      " [-0.0447 -0.0119 -0.0178 -0.0188 -0.0082]\n",
      " [-0.0247 -0.0509  0.0484 -0.0167 -0.006 ]\n",
      " [-0.0407  0.0787  0.383   0.0353 -0.0126]\n",
      " [-0.0303 -0.0062  0.0921 -0.0031 -0.003 ]]\n",
      "mean_state_value 0.009588834028836262\n",
      "episode 294/600\n",
      "p1 0.8360000000000002 p0 0.040999999999999946\n",
      "trajectorySteps 341\n",
      "[[  0   1   1  43  42]\n",
      " [ 16   1   1   3   0]\n",
      " [203   8   0   1   2]\n",
      " [  7   0   2   0   2]\n",
      " [  1   0   3   2   2]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0226 -0.0363 -0.0244 -0.0086 -0.0099]\n",
      " [-0.0447 -0.0118 -0.0177 -0.0191 -0.0082]\n",
      " [-0.0251 -0.0511  0.0484 -0.0166 -0.006 ]\n",
      " [-0.0406  0.0788  0.3839  0.0354 -0.0126]\n",
      " [-0.0302 -0.0062  0.0928 -0.003  -0.003 ]]\n",
      "mean_state_value 0.00966705091736983\n",
      "episode 295/600\n",
      "p1 0.8368000000000002 p0 0.04079999999999995\n",
      "trajectorySteps 16\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [2 0 0 0 0]\n",
      " [7 1 2 0 0]\n",
      " [4 0 0 0 0]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0226 -0.0361 -0.0243 -0.0086 -0.0099]\n",
      " [-0.0445 -0.0118 -0.0176 -0.019  -0.0081]\n",
      " [-0.0251 -0.0509  0.0485 -0.0165 -0.0059]\n",
      " [-0.0408  0.0801  0.3861  0.0354 -0.0125]\n",
      " [-0.0303 -0.0061  0.0929 -0.003  -0.003 ]]\n",
      "mean_state_value 0.009856277612591278\n",
      "episode 296/600\n",
      "p1 0.8376000000000002 p0 0.04059999999999995\n",
      "trajectorySteps 109\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 2  0  0  0  0]\n",
      " [ 6  0  2  0  0]\n",
      " [96  2  1  0  0]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0225 -0.036  -0.0242 -0.0086 -0.0098]\n",
      " [-0.0443 -0.0117 -0.0175 -0.0189 -0.0081]\n",
      " [-0.025  -0.0506  0.0486 -0.0164 -0.0059]\n",
      " [-0.0406  0.0802  0.3883  0.0355 -0.0124]\n",
      " [-0.0318 -0.006   0.0941 -0.0029 -0.003 ]]\n",
      "mean_state_value 0.010006322543049916\n",
      "episode 297/600\n",
      "p1 0.8384000000000003 p0 0.04039999999999995\n",
      "trajectorySteps 35\n",
      "[[20  1  1  1  1]\n",
      " [ 2  0  0  0  1]\n",
      " [ 1  0  0  0  1]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬇️⏫️⏩️⬇️⬇️\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.023  -0.0358 -0.024  -0.0085 -0.0098]\n",
      " [-0.0441 -0.0117 -0.0175 -0.0189 -0.0081]\n",
      " [-0.025  -0.0504  0.0487 -0.0164 -0.0059]\n",
      " [-0.0405  0.0804  0.3892  0.0355 -0.0124]\n",
      " [-0.0317 -0.0059  0.0948 -0.0028 -0.0029]]\n",
      "mean_state_value 0.010128825705409707\n",
      "episode 298/600\n",
      "p1 0.8392000000000002 p0 0.040199999999999944\n",
      "trajectorySteps 15\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [1 0 0 0 0]\n",
      " [6 1 2 0 0]\n",
      " [5 0 0 0 0]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬇️⏫️⏩️⬇️⬇️\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "🔄⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0229 -0.0357 -0.0239 -0.0085 -0.0098]\n",
      " [-0.0439 -0.0117 -0.0174 -0.0188 -0.0081]\n",
      " [-0.0249 -0.0502  0.0488 -0.0163 -0.0059]\n",
      " [-0.0407  0.0816  0.3914  0.0355 -0.0123]\n",
      " [-0.0322 -0.0059  0.0949 -0.0028 -0.0029]]\n",
      "mean_state_value 0.010302772984236373\n",
      "episode 299/600\n",
      "p1 0.8400000000000002 p0 0.039999999999999945\n",
      "trajectorySteps 44\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 3  1  0  0  0]\n",
      " [35  3  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬇️⏫️⏩️⬇️⬇️\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0229 -0.0355 -0.0238 -0.0085 -0.0097]\n",
      " [-0.0437 -0.0116 -0.0173 -0.0187 -0.008 ]\n",
      " [-0.0248 -0.05    0.0488 -0.0162 -0.0058]\n",
      " [-0.0418  0.0829  0.3936  0.0356 -0.0123]\n",
      " [-0.0321 -0.0059  0.095  -0.0028 -0.0029]]\n",
      "mean_state_value 0.010465565602947728\n",
      "episode 300/600\n",
      "p1 0.8408000000000002 p0 0.03979999999999995\n",
      "trajectorySteps 45\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 4  1  0  0  0]\n",
      " [20  0  2  0  0]\n",
      " [16  1  1  0  0]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬇️⏫️⏩️⬇️⬇️\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0228 -0.0354 -0.0237 -0.0085 -0.0097]\n",
      " [-0.0435 -0.0116 -0.0172 -0.0186 -0.008 ]\n",
      " [-0.0248 -0.0497  0.0489 -0.0161 -0.0058]\n",
      " [-0.0417  0.083   0.3952  0.0356 -0.0122]\n",
      " [-0.0325 -0.0057  0.0963 -0.0028 -0.0029]]\n",
      "mean_state_value 0.010632460934455405\n",
      "episode 301/600\n",
      "p1 0.8416000000000002 p0 0.03959999999999995\n",
      "trajectorySteps 21\n",
      "[[0 0 0 0 0]\n",
      " [1 0 0 0 0]\n",
      " [9 1 0 0 0]\n",
      " [7 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0228 -0.0352 -0.0236 -0.0084 -0.0097]\n",
      " [-0.0434 -0.0115 -0.0171 -0.0185 -0.008 ]\n",
      " [-0.0249 -0.0495  0.049  -0.016  -0.0058]\n",
      " [-0.042   0.0843  0.3974  0.0357 -0.0121]\n",
      " [-0.0324 -0.0057  0.0964 -0.0027 -0.0029]]\n",
      "mean_state_value 0.010821685951272657\n",
      "episode 302/600\n",
      "p1 0.8424000000000003 p0 0.03939999999999995\n",
      "trajectorySteps 62\n",
      "[[39  1  1  1  1]\n",
      " [ 7  0  0  0  1]\n",
      " [ 2  0  0  0  1]\n",
      " [ 1  0  2  0  1]\n",
      " [ 0  0  1  1  2]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬇️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0228 -0.0351 -0.0235 -0.0084 -0.0096]\n",
      " [-0.0432 -0.0115 -0.017  -0.0184 -0.008 ]\n",
      " [-0.025  -0.0493  0.0491 -0.016  -0.0058]\n",
      " [-0.0418  0.0845  0.3996  0.0357 -0.0121]\n",
      " [-0.0323 -0.0056  0.0977 -0.0026 -0.0029]]\n",
      "mean_state_value 0.01102831411049662\n",
      "episode 303/600\n",
      "p1 0.8432000000000002 p0 0.03919999999999994\n",
      "trajectorySteps 60\n",
      "[[ 0  1  1  1  1]\n",
      " [ 1  1  0  0  3]\n",
      " [ 1  0  0  0 22]\n",
      " [ 0  0  2  1 22]\n",
      " [ 0  0  1  1  1]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬇️⏫️⏩️⬇️⬇️\n",
      "➡️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0228 -0.0349 -0.0234 -0.0084 -0.0096]\n",
      " [-0.0434 -0.0114 -0.017  -0.0183 -0.0079]\n",
      " [-0.0251 -0.049   0.0492 -0.0159 -0.0059]\n",
      " [-0.0416  0.0846  0.4018  0.0358 -0.0125]\n",
      " [-0.0321 -0.0056  0.0989 -0.0025 -0.0029]]\n",
      "mean_state_value 0.011200044548889253\n",
      "episode 304/600\n",
      "p1 0.8440000000000002 p0 0.038999999999999944\n",
      "trajectorySteps 69\n",
      "[[ 0  1  1  1  1]\n",
      " [ 0  1  0  0  2]\n",
      " [11 11  0  4 28]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  2  2  1]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬇️⏫️⏩️⬇️⬇️\n",
      "➡️🔄⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0227 -0.0347 -0.0233 -0.0084 -0.0095]\n",
      " [-0.0432 -0.0114 -0.0169 -0.0182 -0.0079]\n",
      " [-0.0252 -0.0493  0.0493 -0.0158 -0.0059]\n",
      " [-0.0414  0.0847  0.404   0.0358 -0.0124]\n",
      " [-0.032  -0.0056  0.1002 -0.0024 -0.0029]]\n",
      "mean_state_value 0.011394567184624787\n",
      "episode 305/600\n",
      "p1 0.8448000000000002 p0 0.038799999999999946\n",
      "trajectorySteps 25\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 2 20  1  0  0]\n",
      " [ 0  0  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬇️⏫️⏩️⬇️⬇️\n",
      "➡️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0227 -0.0346 -0.0232 -0.0083 -0.0095]\n",
      " [-0.043  -0.0113 -0.0168 -0.0182 -0.0079]\n",
      " [-0.0252 -0.0494  0.0506 -0.0157 -0.0059]\n",
      " [-0.0413  0.0848  0.4062  0.0359 -0.0124]\n",
      " [-0.0319 -0.0055  0.1003 -0.0023 -0.0029]]\n",
      "mean_state_value 0.011596263207575286\n",
      "episode 306/600\n",
      "p1 0.8456000000000002 p0 0.03859999999999995\n",
      "trajectorySteps 80\n",
      "[[ 0  1  1  2  2]\n",
      " [ 0  2  0  0  1]\n",
      " [30 30  0  0  3]\n",
      " [ 1  0  2  0  1]\n",
      " [ 0  0  1  2  1]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬇️⏫️⏩️⬇️⬇️\n",
      "➡️🔄⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0226 -0.0344 -0.0231 -0.0083 -0.0095]\n",
      " [-0.0428 -0.0113 -0.0167 -0.0181 -0.0078]\n",
      " [-0.0251 -0.0506  0.0506 -0.0157 -0.0059]\n",
      " [-0.0411  0.085   0.408   0.0359 -0.0123]\n",
      " [-0.0318 -0.0055  0.1016 -0.0022 -0.0029]]\n",
      "mean_state_value 0.01174083545139368\n",
      "episode 307/600\n",
      "p1 0.8464000000000003 p0 0.03839999999999995\n",
      "trajectorySteps 23\n",
      "[[ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [ 3 16  1  0  0]\n",
      " [ 0  0  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬇️⏫️⏩️⬇️⬇️\n",
      "➡️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0225 -0.0343 -0.023  -0.0083 -0.0094]\n",
      " [-0.0426 -0.0113 -0.0166 -0.018  -0.0078]\n",
      " [-0.0251 -0.0508  0.0519 -0.0156 -0.0058]\n",
      " [-0.0409  0.0851  0.4102  0.036  -0.0122]\n",
      " [-0.0316 -0.0054  0.1017 -0.0022 -0.0029]]\n",
      "mean_state_value 0.011941513232529425\n",
      "episode 308/600\n",
      "p1 0.8472000000000002 p0 0.03819999999999994\n",
      "trajectorySteps 60\n",
      "[[ 0  0  0  0  0]\n",
      " [ 2  0  0  0  0]\n",
      " [28 27  1  0  0]\n",
      " [ 0  0  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬇️⏫️⏩️⬇️⬇️\n",
      "⬇️🔄⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0225 -0.0341 -0.0229 -0.0083 -0.0094]\n",
      " [-0.0424 -0.0112 -0.0165 -0.0179 -0.0078]\n",
      " [-0.0251 -0.0512  0.0532 -0.0155 -0.0058]\n",
      " [-0.0407  0.0852  0.4124  0.036  -0.0122]\n",
      " [-0.0315 -0.0054  0.1019 -0.0022 -0.0028]]\n",
      "mean_state_value 0.012127662546485993\n",
      "episode 309/600\n",
      "p1 0.8480000000000002 p0 0.037999999999999944\n",
      "trajectorySteps 9\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [2 2 1 0 0]\n",
      " [1 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬇️⏫️⏩️⬇️⬇️\n",
      "➡️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0224 -0.034  -0.0228 -0.0082 -0.0093]\n",
      " [-0.0423 -0.0112 -0.0165 -0.0178 -0.0078]\n",
      " [-0.0252 -0.0515  0.0546 -0.0154 -0.0058]\n",
      " [-0.0409  0.0854  0.4146  0.036  -0.0121]\n",
      " [-0.0314 -0.0054  0.102  -0.0022 -0.0028]]\n",
      "mean_state_value 0.012307470471009756\n",
      "episode 310/600\n",
      "p1 0.8488000000000002 p0 0.037799999999999945\n",
      "trajectorySteps 44\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  2  1  0]\n",
      " [ 2  1  1  2 28]\n",
      " [ 1  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬇️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬅️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0224 -0.0338 -0.0226 -0.0082 -0.0093]\n",
      " [-0.0421 -0.0111 -0.0168 -0.0177 -0.0077]\n",
      " [-0.0255 -0.0516  0.0543 -0.0154 -0.0059]\n",
      " [-0.0407  0.0855  0.4169  0.0361 -0.012 ]\n",
      " [-0.0313 -0.0053  0.1033 -0.002  -0.0028]]\n",
      "mean_state_value 0.012467742450809569\n",
      "episode 311/600\n",
      "p1 0.8496000000000002 p0 0.037599999999999946\n",
      "trajectorySteps 25\n",
      "[[0 1 2 1 1]\n",
      " [1 1 1 0 1]\n",
      " [1 0 0 6 6]\n",
      " [0 0 2 1 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬇️⏫️⏩️⬇️⬇️\n",
      "➡️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0223 -0.0337 -0.0225 -0.0082 -0.0093]\n",
      " [-0.0422 -0.0114 -0.0167 -0.0176 -0.0077]\n",
      " [-0.0255 -0.0513  0.0543 -0.0157 -0.0059]\n",
      " [-0.0406  0.0856  0.4191  0.0374 -0.012 ]\n",
      " [-0.0312 -0.0053  0.1034 -0.002  -0.0028]]\n",
      "mean_state_value 0.012638241422122762\n",
      "episode 312/600\n",
      "p1 0.8504000000000003 p0 0.03739999999999995\n",
      "trajectorySteps 70\n",
      "[[ 0  1  1  1  2]\n",
      " [ 1  1  0  0  1]\n",
      " [ 4  3  0  1 46]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  1  2  2]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0223 -0.0335 -0.0224 -0.0082 -0.0093]\n",
      " [-0.042  -0.0114 -0.0166 -0.0175 -0.0077]\n",
      " [-0.0257 -0.0515  0.0544 -0.0156 -0.006 ]\n",
      " [-0.0404  0.0858  0.4213  0.0374 -0.0119]\n",
      " [-0.031  -0.0052  0.1047 -0.0019 -0.0028]]\n",
      "mean_state_value 0.012829709175814668\n",
      "episode 313/600\n",
      "p1 0.8512000000000002 p0 0.03719999999999994\n",
      "trajectorySteps 29\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [13  0  0  0  0]\n",
      " [13  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "➡️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0222 -0.0334 -0.0223 -0.0082 -0.0092]\n",
      " [-0.0419 -0.0114 -0.0165 -0.0174 -0.0077]\n",
      " [-0.0257 -0.0512  0.0545 -0.0155 -0.0059]\n",
      " [-0.0409  0.0871  0.4236  0.0375 -0.0119]\n",
      " [-0.0309 -0.0052  0.1048 -0.0018 -0.0028]]\n",
      "mean_state_value 0.013018253048733433\n",
      "episode 314/600\n",
      "p1 0.8520000000000002 p0 0.03699999999999994\n",
      "trajectorySteps 35\n",
      "[[ 0  1  1  1  1]\n",
      " [ 0  1  0  0  1]\n",
      " [11  9  0  0  1]\n",
      " [ 2  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "➡️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0222 -0.0332 -0.0222 -0.0082 -0.0092]\n",
      " [-0.0417 -0.0113 -0.0164 -0.0174 -0.0076]\n",
      " [-0.0258 -0.0514  0.0546 -0.0154 -0.0059]\n",
      " [-0.0407  0.0872  0.4258  0.0375 -0.0118]\n",
      " [-0.0308 -0.0052  0.1061 -0.0017 -0.0028]]\n",
      "mean_state_value 0.01321662247818893\n",
      "episode 315/600\n",
      "p1 0.8528000000000002 p0 0.036799999999999944\n",
      "trajectorySteps 54\n",
      "[[ 0  1  1  1  1]\n",
      " [ 0  1  0  1  1]\n",
      " [18 15  0  1  1]\n",
      " [ 3  0  2  0  1]\n",
      " [ 1  0  2  1  2]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "➡️🔄⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0221 -0.0331 -0.0221 -0.0081 -0.0091]\n",
      " [-0.0415 -0.0113 -0.0163 -0.0173 -0.0076]\n",
      " [-0.026  -0.0517  0.0547 -0.0153 -0.0059]\n",
      " [-0.0405  0.0873  0.428   0.0376 -0.0118]\n",
      " [-0.0307 -0.0051  0.1074 -0.0017 -0.0028]]\n",
      "mean_state_value 0.013403356138305822\n",
      "episode 316/600\n",
      "p1 0.8536000000000002 p0 0.036599999999999945\n",
      "trajectorySteps 9\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [3 2 1 0 0]\n",
      " [1 0 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "➡️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.022  -0.0329 -0.022  -0.0081 -0.0091]\n",
      " [-0.0413 -0.0112 -0.0162 -0.0172 -0.0076]\n",
      " [-0.0259 -0.052   0.0554 -0.0153 -0.0059]\n",
      " [-0.0404  0.0875  0.4291  0.0376 -0.0117]\n",
      " [-0.0305 -0.0051  0.1076 -0.0017 -0.0028]]\n",
      "mean_state_value 0.013533203751194947\n",
      "episode 317/600\n",
      "p1 0.8544000000000003 p0 0.036399999999999946\n",
      "trajectorySteps 32\n",
      "[[0 1 2 1 1]\n",
      " [0 1 0 0 1]\n",
      " [4 3 0 0 2]\n",
      " [5 0 2 0 2]\n",
      " [4 0 1 1 1]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.022  -0.0328 -0.0219 -0.0081 -0.0091]\n",
      " [-0.0411 -0.0112 -0.0162 -0.0171 -0.0075]\n",
      " [-0.026  -0.0521  0.0555 -0.0152 -0.0059]\n",
      " [-0.0402  0.0876  0.4313  0.0377 -0.0116]\n",
      " [-0.0304 -0.005   0.1089 -0.0015 -0.0027]]\n",
      "mean_state_value 0.013736250061827358\n",
      "episode 318/600\n",
      "p1 0.8552000000000002 p0 0.03619999999999994\n",
      "trajectorySteps 22\n",
      "[[6 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 2]\n",
      " [0 0 1 1 2]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬇️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0219 -0.0327 -0.0218 -0.0081 -0.009 ]\n",
      " [-0.0409 -0.0111 -0.0161 -0.017  -0.0075]\n",
      " [-0.0259 -0.0518  0.0556 -0.0151 -0.0058]\n",
      " [-0.04    0.0877  0.4331  0.0377 -0.0116]\n",
      " [-0.0303 -0.005   0.1102 -0.0014 -0.0027]]\n",
      "mean_state_value 0.013943369505437501\n",
      "episode 319/600\n",
      "p1 0.8560000000000002 p0 0.03599999999999994\n",
      "trajectorySteps 57\n",
      "[[ 1  1  1  1  1]\n",
      " [20  0  0  0  1]\n",
      " [22  0  0  0  1]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  1  2  2]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0219 -0.0325 -0.0217 -0.008  -0.009 ]\n",
      " [-0.0407 -0.0111 -0.016  -0.0169 -0.0075]\n",
      " [-0.0257 -0.0516  0.0557 -0.015  -0.0058]\n",
      " [-0.0398  0.0879  0.4354  0.0378 -0.0116]\n",
      " [-0.0302 -0.005   0.1115 -0.0013 -0.0028]]\n",
      "mean_state_value 0.014168890956915021\n",
      "episode 320/600\n",
      "p1 0.8568000000000002 p0 0.03579999999999994\n",
      "trajectorySteps 18\n",
      "[[0 1 1 1 1]\n",
      " [1 1 0 0 1]\n",
      " [3 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 2]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "➡️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0218 -0.0324 -0.0216 -0.008  -0.009 ]\n",
      " [-0.0409 -0.011  -0.0159 -0.0168 -0.0075]\n",
      " [-0.0261 -0.0513  0.0558 -0.0149 -0.0058]\n",
      " [-0.0396  0.088   0.4376  0.0378 -0.0115]\n",
      " [-0.0301 -0.0049  0.1128 -0.0012 -0.0028]]\n",
      "mean_state_value 0.014362613883664508\n",
      "episode 321/600\n",
      "p1 0.8576000000000003 p0 0.035599999999999944\n",
      "trajectorySteps 15\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [6 6 1 0 0]\n",
      " [0 0 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬇️🔄⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0218 -0.0322 -0.0215 -0.008  -0.0089]\n",
      " [-0.0407 -0.011  -0.0158 -0.0167 -0.0074]\n",
      " [-0.0261 -0.0517  0.0571 -0.0149 -0.0058]\n",
      " [-0.0395  0.0881  0.4399  0.0379 -0.0115]\n",
      " [-0.0299 -0.0049  0.113  -0.0011 -0.0028]]\n",
      "mean_state_value 0.014559412073863784\n",
      "episode 322/600\n",
      "p1 0.8584000000000003 p0 0.035399999999999945\n",
      "trajectorySteps 35\n",
      "[[12  1  1  1  2]\n",
      " [ 4  0  0  0  2]\n",
      " [ 1  0  0  0  1]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  2  2  3]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬇️🔄⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0217 -0.0321 -0.0214 -0.008  -0.0089]\n",
      " [-0.0405 -0.0109 -0.0157 -0.0167 -0.0074]\n",
      " [-0.026  -0.0514  0.0572 -0.0148 -0.0057]\n",
      " [-0.0393  0.0883  0.4422  0.0379 -0.0114]\n",
      " [-0.0298 -0.0048  0.1143 -0.0011 -0.0027]]\n",
      "mean_state_value 0.014778832973577991\n",
      "episode 323/600\n",
      "p1 0.8592000000000002 p0 0.03519999999999994\n",
      "trajectorySteps 16\n",
      "[[2 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬇️🔄⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0216 -0.0319 -0.0213 -0.0079 -0.0088]\n",
      " [-0.0403 -0.0109 -0.0156 -0.0166 -0.0074]\n",
      " [-0.026  -0.0511  0.0573 -0.0147 -0.0057]\n",
      " [-0.0391  0.0884  0.4444  0.038  -0.0113]\n",
      " [-0.0297 -0.0048  0.1156 -0.001  -0.0027]]\n",
      "mean_state_value 0.015005156670268114\n",
      "episode 324/600\n",
      "p1 0.8600000000000002 p0 0.03499999999999994\n",
      "trajectorySteps 33\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [13  0  2  0  0]\n",
      " [15  1  1  0  0]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬇️🔄⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0216 -0.0318 -0.0212 -0.0079 -0.0088]\n",
      " [-0.0401 -0.0108 -0.0156 -0.0165 -0.0074]\n",
      " [-0.0259 -0.0509  0.0574 -0.0146 -0.0057]\n",
      " [-0.039   0.0885  0.4467  0.038  -0.0113]\n",
      " [-0.0301 -0.0046  0.1169 -0.0009 -0.0027]]\n",
      "mean_state_value 0.015209981313239269\n",
      "episode 325/600\n",
      "p1 0.8608000000000002 p0 0.03479999999999994\n",
      "trajectorySteps 6\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [1 2 1 0 0]\n",
      " [0 0 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0215 -0.0316 -0.0211 -0.0079 -0.0088]\n",
      " [-0.0399 -0.0108 -0.0155 -0.0164 -0.0073]\n",
      " [-0.0259 -0.051   0.0588 -0.0145 -0.0057]\n",
      " [-0.0388  0.0887  0.4483  0.038  -0.0112]\n",
      " [-0.0299 -0.0046  0.117  -0.0009 -0.0027]]\n",
      "mean_state_value 0.015394497862614133\n",
      "episode 326/600\n",
      "p1 0.8616000000000003 p0 0.03459999999999994\n",
      "trajectorySteps 30\n",
      "[[ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [14  2  0  0  0]\n",
      " [10  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0215 -0.0315 -0.021  -0.0079 -0.0087]\n",
      " [-0.0397 -0.0108 -0.0154 -0.0163 -0.0073]\n",
      " [-0.0262 -0.0509  0.0589 -0.0145 -0.0057]\n",
      " [-0.039   0.09    0.4506  0.0381 -0.0112]\n",
      " [-0.0298 -0.0046  0.1172 -0.0009 -0.0027]]\n",
      "mean_state_value 0.015578308997419918\n",
      "episode 327/600\n",
      "p1 0.8624000000000003 p0 0.034399999999999945\n",
      "trajectorySteps 38\n",
      "[[20  1  1  2  2]\n",
      " [ 1  0  0  0  1]\n",
      " [ 1  0  0  0  1]\n",
      " [ 0  0  2  0  2]\n",
      " [ 0  0  1  1  2]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0214 -0.0313 -0.0209 -0.0078 -0.0087]\n",
      " [-0.0395 -0.0107 -0.0153 -0.0162 -0.0073]\n",
      " [-0.0261 -0.0507  0.059  -0.0144 -0.0056]\n",
      " [-0.0388  0.0902  0.4529  0.0381 -0.0111]\n",
      " [-0.0297 -0.0045  0.1185 -0.0008 -0.0027]]\n",
      "mean_state_value 0.015806456646694937\n",
      "episode 328/600\n",
      "p1 0.8632000000000002 p0 0.03419999999999994\n",
      "trajectorySteps 32\n",
      "[[14  1  1  2  1]\n",
      " [ 1  0  0  0  1]\n",
      " [ 2  2  0  0  1]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "🔄➡️🔄➡️⬇️\n",
      "🔄⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0214 -0.0311 -0.0208 -0.0078 -0.0086]\n",
      " [-0.0393 -0.0107 -0.0152 -0.0161 -0.0072]\n",
      " [-0.0261 -0.0504  0.059  -0.0143 -0.0056]\n",
      " [-0.0387  0.0903  0.4551  0.0382 -0.011 ]\n",
      " [-0.0296 -0.0045  0.1199 -0.0006 -0.0027]]\n",
      "mean_state_value 0.01603078757346906\n",
      "episode 329/600\n",
      "p1 0.8640000000000002 p0 0.03399999999999994\n",
      "trajectorySteps 28\n",
      "[[0 1 7 0 0]\n",
      " [5 1 1 1 0]\n",
      " [3 1 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0213 -0.031  -0.0211 -0.0078 -0.0086]\n",
      " [-0.0395 -0.0106 -0.0151 -0.016  -0.0072]\n",
      " [-0.026  -0.0502  0.0591 -0.0142 -0.0056]\n",
      " [-0.0385  0.0904  0.4574  0.0382 -0.011 ]\n",
      " [-0.0294 -0.0044  0.1212 -0.0005 -0.0026]]\n",
      "mean_state_value 0.016230249680844747\n",
      "episode 330/600\n",
      "p1 0.8648000000000002 p0 0.03379999999999994\n",
      "trajectorySteps 33\n",
      "[[20  1  1  1  1]\n",
      " [ 1  0  0  0  1]\n",
      " [ 2  0  0  0  1]\n",
      " [ 0  0  2  1  1]\n",
      " [ 0  0  0  0  0]]\n",
      "⬇️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬅️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-2.130e-02 -3.080e-02 -2.100e-02 -7.800e-03 -8.600e-03]\n",
      " [-3.930e-02 -1.060e-02 -1.510e-02 -1.590e-02 -7.200e-03]\n",
      " [-2.590e-02 -4.990e-02  5.920e-02 -1.420e-02 -5.700e-03]\n",
      " [-3.830e-02  9.060e-02  4.597e-01  3.960e-02 -1.120e-02]\n",
      " [-2.930e-02 -4.400e-03  1.213e-01 -4.000e-04 -2.600e-03]]\n",
      "mean_state_value 0.01643632131824874\n",
      "episode 331/600\n",
      "p1 0.8656000000000003 p0 0.03359999999999994\n",
      "trajectorySteps 31\n",
      "[[7 1 1 1 1]\n",
      " [8 0 0 0 1]\n",
      " [2 0 1 2 3]\n",
      " [1 0 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "⬇️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬🔄🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-2.120e-02 -3.070e-02 -2.090e-02 -7.800e-03 -8.500e-03]\n",
      " [-3.910e-02 -1.050e-02 -1.500e-02 -1.590e-02 -7.200e-03]\n",
      " [-2.590e-02 -4.970e-02  6.060e-02 -1.440e-02 -5.800e-03]\n",
      " [-3.810e-02  9.070e-02  4.620e-01  3.960e-02 -1.120e-02]\n",
      " [-2.920e-02 -4.400e-03  1.215e-01 -4.000e-04 -2.600e-03]]\n",
      "mean_state_value 0.016645575619860478\n",
      "episode 332/600\n",
      "p1 0.8664000000000003 p0 0.033399999999999944\n",
      "trajectorySteps 48\n",
      "[[ 9  1  1  2  3]\n",
      " [11  0  0  0  3]\n",
      " [ 3  1  0  0  8]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "⬇️➡️➡️⬅️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬🔄🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-2.110e-02 -3.050e-02 -2.080e-02 -7.800e-03 -8.500e-03]\n",
      " [-3.900e-02 -1.050e-02 -1.490e-02 -1.580e-02 -7.100e-03]\n",
      " [-2.580e-02 -4.940e-02  6.070e-02 -1.440e-02 -5.700e-03]\n",
      " [-3.790e-02  9.080e-02  4.631e-01  3.970e-02 -1.110e-02]\n",
      " [-2.910e-02 -4.300e-03  1.221e-01 -3.000e-04 -2.600e-03]]\n",
      "mean_state_value 0.01679690880862615\n",
      "episode 333/600\n",
      "p1 0.8672000000000002 p0 0.03319999999999994\n",
      "trajectorySteps 89\n",
      "[[ 8  1 25 26  1]\n",
      " [10  0  0  1  1]\n",
      " [ 2  0  0  3  7]\n",
      " [ 1  0  2  1  0]\n",
      " [ 0  0  0  0  0]]\n",
      "⬇️➡️➡️⬅️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-2.110e-02 -3.040e-02 -2.080e-02 -7.800e-03 -8.500e-03]\n",
      " [-3.890e-02 -1.040e-02 -1.480e-02 -1.570e-02 -7.100e-03]\n",
      " [-2.570e-02 -4.910e-02  6.080e-02 -1.470e-02 -5.800e-03]\n",
      " [-3.780e-02  9.100e-02  4.654e-01  4.100e-02 -1.100e-02]\n",
      " [-2.890e-02 -4.300e-03  1.223e-01 -2.000e-04 -2.600e-03]]\n",
      "mean_state_value 0.016995507020193017\n",
      "episode 334/600\n",
      "p1 0.8680000000000002 p0 0.03299999999999994\n",
      "trajectorySteps 85\n",
      "[[ 3  1 35 32  0]\n",
      " [ 2  0  0  1  0]\n",
      " [ 2  0  0  2  1]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "⬇️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-2.110e-02 -3.020e-02 -2.080e-02 -7.800e-03 -8.400e-03]\n",
      " [-3.880e-02 -1.040e-02 -1.470e-02 -1.560e-02 -7.100e-03]\n",
      " [-2.570e-02 -4.890e-02  6.090e-02 -1.460e-02 -5.800e-03]\n",
      " [-3.760e-02  9.110e-02  4.677e-01  4.110e-02 -1.100e-02]\n",
      " [-2.880e-02 -4.200e-03  1.236e-01 -1.000e-04 -2.600e-03]]\n",
      "mean_state_value 0.017214769694660282\n",
      "episode 335/600\n",
      "p1 0.8688000000000002 p0 0.03279999999999994\n",
      "trajectorySteps 21\n",
      "[[3 1 1 0 0]\n",
      " [3 0 1 1 0]\n",
      " [2 1 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "⬇️➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-2.10e-02 -3.05e-02 -2.10e-02 -7.80e-03 -8.40e-03]\n",
      " [-3.86e-02 -1.03e-02 -1.46e-02 -1.55e-02 -7.10e-03]\n",
      " [-2.56e-02 -4.86e-02  6.10e-02 -1.45e-02 -5.80e-03]\n",
      " [-3.74e-02  9.12e-02  4.70e-01  4.11e-02 -1.09e-02]\n",
      " [-2.87e-02 -4.20e-03  1.25e-01  1.00e-04 -2.50e-03]]\n",
      "mean_state_value 0.01741527427764628\n",
      "episode 336/600\n",
      "p1 0.8696000000000003 p0 0.03259999999999994\n",
      "trajectorySteps 20\n",
      "[[2 1 1 1 1]\n",
      " [3 1 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-2.110e-02 -3.040e-02 -2.080e-02 -7.700e-03 -8.400e-03]\n",
      " [-3.870e-02 -1.030e-02 -1.450e-02 -1.540e-02 -7.000e-03]\n",
      " [-2.560e-02 -4.840e-02  6.110e-02 -1.440e-02 -5.700e-03]\n",
      " [-3.720e-02  9.140e-02  4.723e-01  4.120e-02 -1.090e-02]\n",
      " [-2.860e-02 -4.200e-03  1.264e-01  2.000e-04 -2.500e-03]]\n",
      "mean_state_value 0.017627923251637687\n",
      "episode 337/600\n",
      "p1 0.8704000000000003 p0 0.03239999999999994\n",
      "trajectorySteps 89\n",
      "[[68  1  1  0  0]\n",
      " [ 5  0  1  1  0]\n",
      " [ 1  0  0  1  1]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  1  2  1  2]]\n",
      "🔄⬅️➡️➡️⬇️\n",
      "⬇️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-2.150e-02 -3.030e-02 -2.100e-02 -7.700e-03 -8.300e-03]\n",
      " [-3.860e-02 -1.020e-02 -1.450e-02 -1.530e-02 -7.000e-03]\n",
      " [-2.550e-02 -4.810e-02  6.120e-02 -1.430e-02 -5.700e-03]\n",
      " [-3.700e-02  9.150e-02  4.746e-01  4.120e-02 -1.080e-02]\n",
      " [-2.840e-02 -4.000e-03  1.274e-01  2.000e-04 -2.500e-03]]\n",
      "mean_state_value 0.01780441228031698\n",
      "episode 338/600\n",
      "p1 0.8712000000000002 p0 0.03219999999999994\n",
      "trajectorySteps 95\n",
      "[[34  2  1  1  2]\n",
      " [14  0  0  0  2]\n",
      " [14  1  0  0  9]\n",
      " [ 1  0  2  1 11]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️⬅️➡️➡️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬆️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-2.160e-02 -3.010e-02 -2.090e-02 -7.700e-03 -8.300e-03]\n",
      " [-3.850e-02 -1.020e-02 -1.440e-02 -1.520e-02 -7.000e-03]\n",
      " [-2.530e-02 -4.790e-02  6.130e-02 -1.430e-02 -5.700e-03]\n",
      " [-3.690e-02  9.160e-02  4.770e-01  4.260e-02 -1.110e-02]\n",
      " [-2.830e-02 -4.000e-03  1.275e-01  2.000e-04 -2.500e-03]]\n",
      "mean_state_value 0.018018629903441198\n",
      "episode 339/600\n",
      "p1 0.8720000000000002 p0 0.03199999999999994\n",
      "trajectorySteps 134\n",
      "[[52 50  1  1  1]\n",
      " [ 1  3  0  0  2]\n",
      " [ 2  0  0  2 16]\n",
      " [ 0  0  2  1  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬇️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬆️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-2.190e-02 -3.130e-02 -2.080e-02 -7.600e-03 -8.200e-03]\n",
      " [-3.830e-02 -1.020e-02 -1.430e-02 -1.520e-02 -6.900e-03]\n",
      " [-2.530e-02 -4.760e-02  6.130e-02 -1.450e-02 -5.700e-03]\n",
      " [-3.670e-02  9.170e-02  4.793e-01  4.400e-02 -1.100e-02]\n",
      " [-2.820e-02 -3.900e-03  1.277e-01  2.000e-04 -2.500e-03]]\n",
      "mean_state_value 0.018162488685919652\n",
      "episode 340/600\n",
      "p1 0.8728000000000002 p0 0.03179999999999994\n",
      "trajectorySteps 710\n",
      "[[  0   1   1   1   1]\n",
      " [  7   1   0   2  18]\n",
      " [  7   0   0  22 622]\n",
      " [  3   0   2   0  19]\n",
      " [  0   0   1   1   1]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬇️⏫️⏩️⬇️⬆️\n",
      "➡️⬅️⏬⬆️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-2.190e-02 -3.110e-02 -2.070e-02 -7.600e-03 -8.200e-03]\n",
      " [-3.850e-02 -1.010e-02 -1.420e-02 -1.510e-02 -7.000e-03]\n",
      " [-2.550e-02 -4.730e-02  6.140e-02 -1.450e-02 -6.600e-03]\n",
      " [-3.650e-02  9.190e-02  4.816e-01  4.400e-02 -1.090e-02]\n",
      " [-2.810e-02 -3.900e-03  1.291e-01  4.000e-04 -2.500e-03]]\n",
      "mean_state_value 0.018330713413622853\n",
      "episode 341/600\n",
      "p1 0.8736000000000003 p0 0.03159999999999994\n",
      "trajectorySteps 89\n",
      "[[ 0  1  1  2 10]\n",
      " [ 1  1  0 11  9]\n",
      " [19 21  1 10  0]\n",
      " [ 0  0  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬇️⏫️⏩️🔄⬆️\n",
      "➡️⬅️⏬⬆️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-2.180e-02 -3.100e-02 -2.060e-02 -7.600e-03 -8.200e-03]\n",
      " [-3.830e-02 -1.010e-02 -1.410e-02 -1.510e-02 -6.900e-03]\n",
      " [-2.520e-02 -4.760e-02  6.280e-02 -1.470e-02 -6.600e-03]\n",
      " [-3.630e-02  9.200e-02  4.839e-01  4.410e-02 -1.090e-02]\n",
      " [-2.790e-02 -3.800e-03  1.292e-01  4.000e-04 -2.500e-03]]\n",
      "mean_state_value 0.018531679894385134\n",
      "episode 342/600\n",
      "p1 0.8744000000000003 p0 0.03139999999999994\n",
      "trajectorySteps 14\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [6 5 0 0 0]\n",
      " [0 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬇️⏫️⏩️🔄⬆️\n",
      "🔄⬅️⏬⬆️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-2.170e-02 -3.080e-02 -2.050e-02 -7.600e-03 -8.100e-03]\n",
      " [-3.810e-02 -1.000e-02 -1.400e-02 -1.500e-02 -6.900e-03]\n",
      " [-2.550e-02 -4.760e-02  6.290e-02 -1.460e-02 -6.500e-03]\n",
      " [-3.610e-02  9.340e-02  4.862e-01  4.410e-02 -1.080e-02]\n",
      " [-2.780e-02 -3.800e-03  1.293e-01  4.000e-04 -2.500e-03]]\n",
      "mean_state_value 0.01873359207102645\n",
      "episode 343/600\n",
      "p1 0.8752000000000002 p0 0.03119999999999994\n",
      "trajectorySteps 187\n",
      "[[  0   0   0   0   0]\n",
      " [  1   0   0   0   0]\n",
      " [170   7   1   0   0]\n",
      " [  6   0   2   0   0]\n",
      " [  0   0   0   0   0]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬇️⏫️⏩️🔄⬆️\n",
      "⬇️⬅️⏬⬆️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0217 -0.0307 -0.0204 -0.0075 -0.0081]\n",
      " [-0.0379 -0.01   -0.0139 -0.0149 -0.0069]\n",
      " [-0.0256 -0.0477  0.0643 -0.0145 -0.0065]\n",
      " [-0.036   0.0936  0.4886  0.0442 -0.0107]\n",
      " [-0.0277 -0.0038  0.1295  0.0005 -0.0025]]\n",
      "mean_state_value 0.01894246970095963\n",
      "episode 344/600\n",
      "p1 0.8760000000000002 p0 0.030999999999999937\n",
      "trajectorySteps 76\n",
      "[[ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [34  0  0  0  0]\n",
      " [38  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️🔄⬆️\n",
      "⬇️⬅️⏬⬆️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0216 -0.0305 -0.0202 -0.0075 -0.0081]\n",
      " [-0.0377 -0.0099 -0.0139 -0.0148 -0.0069]\n",
      " [-0.0257 -0.0475  0.0644 -0.0144 -0.0065]\n",
      " [-0.0365  0.095   0.4909  0.0442 -0.0107]\n",
      " [-0.0276 -0.0037  0.1296  0.0005 -0.0024]]\n",
      "mean_state_value 0.019143170598790955\n",
      "episode 345/600\n",
      "p1 0.8768000000000002 p0 0.03079999999999994\n",
      "trajectorySteps 206\n",
      "[[53  2  3  4 28]\n",
      " [ 4  2  4 68 25]\n",
      " [ 3  0  0  2  1]\n",
      " [ 1  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬇️⏫️⏩️⬆️⬇️\n",
      "⬇️⬅️⏬🔄⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0219 -0.0304 -0.0202 -0.0075 -0.0081]\n",
      " [-0.0378 -0.0099 -0.0141 -0.016  -0.0069]\n",
      " [-0.0256 -0.0472  0.0645 -0.0144 -0.0065]\n",
      " [-0.0363  0.0951  0.4932  0.0443 -0.0106]\n",
      " [-0.0274 -0.0037  0.131   0.0006 -0.0024]]\n",
      "mean_state_value 0.019275508152488647\n",
      "episode 346/600\n",
      "p1 0.8776000000000003 p0 0.03059999999999994\n",
      "trajectorySteps 25\n",
      "[[ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [10  0  0  0  0]\n",
      " [10  2  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬇️⏫️⏩️⬆️⬇️\n",
      "🔄⬅️⏬🔄⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0219 -0.0302 -0.02   -0.0075 -0.0081]\n",
      " [-0.0376 -0.0098 -0.014  -0.0159 -0.0069]\n",
      " [-0.0256 -0.0469  0.0646 -0.0143 -0.0064]\n",
      " [-0.0368  0.0965  0.4956  0.0443 -0.0105]\n",
      " [-0.0273 -0.0036  0.1312  0.0007 -0.0024]]\n",
      "mean_state_value 0.01947712498575091\n",
      "episode 347/600\n",
      "p1 0.8784000000000003 p0 0.030399999999999937\n",
      "trajectorySteps 358\n",
      "[[ 22   0   0   0   0]\n",
      " [ 19   0   0   0   0]\n",
      " [291   4   0   0   0]\n",
      " [  8   1   2   0   0]\n",
      " [ 11   0   0   0   0]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬆️⏫️⏩️⬆️⬇️\n",
      "⬆️⬅️⏬🔄⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0221 -0.0301 -0.0199 -0.0075 -0.008 ]\n",
      " [-0.0376 -0.0098 -0.0139 -0.0158 -0.0068]\n",
      " [-0.026  -0.0467  0.0647 -0.0142 -0.0064]\n",
      " [-0.0369  0.0979  0.4979  0.0444 -0.0105]\n",
      " [-0.0272 -0.0036  0.1313  0.0007 -0.0024]]\n",
      "mean_state_value 0.01965952067957669\n",
      "episode 348/600\n",
      "p1 0.8792000000000002 p0 0.03019999999999994\n",
      "trajectorySteps 26\n",
      "[[6 1 1 1 2]\n",
      " [1 0 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 2 3 1]]\n",
      "🔄🔄➡️⬅️⬇️\n",
      "⬆️⏫️⏩️⬆️⬇️\n",
      "⬆️⬅️⏬🔄⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0221 -0.03   -0.0201 -0.0075 -0.008 ]\n",
      " [-0.0374 -0.0097 -0.0138 -0.0157 -0.0068]\n",
      " [-0.0259 -0.0464  0.0648 -0.0141 -0.0064]\n",
      " [-0.0367  0.0981  0.5003  0.0444 -0.0104]\n",
      " [-0.0271 -0.0036  0.1327  0.0007 -0.0024]]\n",
      "mean_state_value 0.01986923917044411\n",
      "episode 349/600\n",
      "p1 0.8800000000000002 p0 0.029999999999999936\n",
      "trajectorySteps 703\n",
      "[[400 150  57  57   1]\n",
      " [ 21   5   0   1   1]\n",
      " [  3   0   0   0   1]\n",
      " [  0   0   2   0   1]\n",
      " [  0   0   1   1   1]]\n",
      "⬇️⬅️➡️⬅️⬇️\n",
      "⬇️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬🔄⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0231 -0.0316 -0.0201 -0.0075 -0.008 ]\n",
      " [-0.0373 -0.0097 -0.0137 -0.0157 -0.0068]\n",
      " [-0.0259 -0.0462  0.0649 -0.0141 -0.0063]\n",
      " [-0.0365  0.0982  0.5026  0.0445 -0.0103]\n",
      " [-0.0269 -0.0035  0.1341  0.0008 -0.0024]]\n",
      "mean_state_value 0.019987108336076146\n",
      "episode 350/600\n",
      "p1 0.8808000000000002 p0 0.029799999999999938\n",
      "trajectorySteps 59\n",
      "[[ 1  0  0  0  0]\n",
      " [ 7  0  0  0  0]\n",
      " [ 8  2  0  0  0]\n",
      " [ 1  0  2  0  0]\n",
      " [36  1  1  0  0]]\n",
      "⬇️⬅️➡️⬅️⬇️\n",
      "🔄⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬🔄⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0231 -0.0314 -0.0199 -0.0074 -0.0079]\n",
      " [-0.0371 -0.0097 -0.0137 -0.0156 -0.0068]\n",
      " [-0.0259 -0.0459  0.065  -0.014  -0.0063]\n",
      " [-0.0363  0.0984  0.505   0.0445 -0.0103]\n",
      " [-0.0271 -0.0033  0.1355  0.0009 -0.0024]]\n",
      "mean_state_value 0.020208099512410264\n",
      "episode 351/600\n",
      "p1 0.8816000000000003 p0 0.02959999999999994\n",
      "trajectorySteps 1339\n",
      "[[  65   28    4    5    0]\n",
      " [1140   26    0    2    0]\n",
      " [  41    1    0   24    0]\n",
      " [   0    0    2    1    0]\n",
      " [   0    0    0    0    0]]\n",
      "⬇️➡️➡️⬅️⬇️\n",
      "⬇️⏬⏩️⬇️⬇️\n",
      "➡️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0234 -0.0317 -0.0198 -0.0074 -0.0079]\n",
      " [-0.0455 -0.0098 -0.0136 -0.0155 -0.0067]\n",
      " [-0.0259 -0.0457  0.0651 -0.0143 -0.0063]\n",
      " [-0.0361  0.0985  0.5073  0.046  -0.0102]\n",
      " [-0.027  -0.0033  0.1356  0.0009 -0.0023]]\n",
      "mean_state_value 0.020034957266326314\n",
      "episode 352/600\n",
      "p1 0.8824000000000003 p0 0.029399999999999937\n",
      "trajectorySteps 31\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 4  2  1  0  0]\n",
      " [12  0  2  0  0]\n",
      " [10  0  0  0  0]]\n",
      "⬇️➡️➡️⬅️⬇️\n",
      "⬇️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0234 -0.0316 -0.0197 -0.0074 -0.0078]\n",
      " [-0.0453 -0.0097 -0.0135 -0.0154 -0.0067]\n",
      " [-0.026  -0.0457  0.0658 -0.0142 -0.0063]\n",
      " [-0.036   0.0986  0.5085  0.046  -0.0101]\n",
      " [-0.027  -0.0033  0.1358  0.0009 -0.0023]]\n",
      "mean_state_value 0.02017501466791556\n",
      "episode 353/600\n",
      "p1 0.8832000000000002 p0 0.029199999999999938\n",
      "trajectorySteps 9\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [1 0 0 0 0]\n",
      " [2 0 2 0 0]\n",
      " [2 1 1 0 0]]\n",
      "⬇️➡️➡️⬅️⬇️\n",
      "⬇️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0233 -0.0314 -0.0196 -0.0074 -0.0078]\n",
      " [-0.045  -0.0097 -0.0134 -0.0153 -0.0067]\n",
      " [-0.0259 -0.0454  0.0659 -0.0141 -0.0062]\n",
      " [-0.0358  0.0988  0.5109  0.0461 -0.0101]\n",
      " [-0.0271 -0.0031  0.1372  0.001  -0.0023]]\n",
      "mean_state_value 0.02040270098708961\n",
      "episode 354/600\n",
      "p1 0.8840000000000002 p0 0.028999999999999936\n",
      "trajectorySteps 36\n",
      "[[ 1  0  0  0  0]\n",
      " [13  1  0  0  0]\n",
      " [15  2  1  0  0]\n",
      " [ 1  0  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "⬇️➡️➡️⬅️⬇️\n",
      "⬇️⏫️⏩️⬇️⬇️\n",
      "➡️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0232 -0.0312 -0.0195 -0.0073 -0.0078]\n",
      " [-0.0451 -0.0097 -0.0133 -0.0152 -0.0067]\n",
      " [-0.0262 -0.0454  0.0673 -0.0141 -0.0062]\n",
      " [-0.0357  0.0989  0.5132  0.0461 -0.01  ]\n",
      " [-0.027  -0.003   0.1373  0.001  -0.0023]]\n",
      "mean_state_value 0.02060174656952769\n",
      "episode 355/600\n",
      "p1 0.8848000000000003 p0 0.028799999999999937\n",
      "trajectorySteps 25\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [12 10  1  0  0]\n",
      " [ 0  0  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "⬇️➡️➡️⬅️⬇️\n",
      "⬇️⏫️⏩️⬇️⬇️\n",
      "➡️🔄⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0232 -0.0311 -0.0194 -0.0073 -0.0077]\n",
      " [-0.0449 -0.0096 -0.0132 -0.0151 -0.0066]\n",
      " [-0.0265 -0.0455  0.0688 -0.014  -0.0062]\n",
      " [-0.0355  0.099   0.5149  0.0462 -0.01  ]\n",
      " [-0.0269 -0.003   0.1375  0.001  -0.0023]]\n",
      "mean_state_value 0.020779717735191276\n",
      "episode 356/600\n",
      "p1 0.8856000000000003 p0 0.028599999999999938\n",
      "trajectorySteps 38\n",
      "[[0 1 8 6 0]\n",
      " [1 1 1 1 0]\n",
      " [2 7 0 1 1]\n",
      " [0 0 2 0 2]\n",
      " [0 0 2 1 1]]\n",
      "⬇️➡️➡️⬅️⬇️\n",
      "⬇️⏫️⏩️⬇️⬇️\n",
      "➡️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0231 -0.0309 -0.0195 -0.0073 -0.0077]\n",
      " [-0.0446 -0.0096 -0.0131 -0.015  -0.0066]\n",
      " [-0.0265 -0.0458  0.0689 -0.0139 -0.0062]\n",
      " [-0.0353  0.0992  0.5172  0.0462 -0.0099]\n",
      " [-0.0267 -0.003   0.1389  0.001  -0.0023]]\n",
      "mean_state_value 0.0209811321614139\n",
      "episode 357/600\n",
      "p1 0.8864000000000003 p0 0.028399999999999936\n",
      "trajectorySteps 93\n",
      "[[ 0  1  5  5  0]\n",
      " [ 1  2  0  2  0]\n",
      " [34 33  0  1  1]\n",
      " [ 2  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "⬇️➡️➡️⬅️⬇️\n",
      "⬇️⏫️⏩️⬇️⬇️\n",
      "⬆️🔄⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.023  -0.0307 -0.0194 -0.0072 -0.0076]\n",
      " [-0.0444 -0.0095 -0.013  -0.0149 -0.0066]\n",
      " [-0.0266 -0.0461  0.069  -0.0138 -0.0062]\n",
      " [-0.0351  0.0993  0.5196  0.0463 -0.0098]\n",
      " [-0.0266 -0.0029  0.1403  0.0012 -0.0023]]\n",
      "mean_state_value 0.02118859428843096\n",
      "episode 358/600\n",
      "p1 0.8872000000000002 p0 0.028199999999999937\n",
      "trajectorySteps 84\n",
      "[[ 0  1 13 13  2]\n",
      " [20  1  0  0  1]\n",
      " [23  0  0  0  1]\n",
      " [ 2  0  2  0  2]\n",
      " [ 0  0  1  1  1]]\n",
      "⬇️➡️➡️⬅️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "➡️🔄⏬➡️⬅️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.023  -0.0306 -0.0193 -0.0073 -0.0076]\n",
      " [-0.0445 -0.0095 -0.013  -0.0148 -0.0066]\n",
      " [-0.0267 -0.0459  0.0691 -0.0137 -0.0061]\n",
      " [-0.0349  0.0995  0.522   0.0463 -0.0098]\n",
      " [-0.0265 -0.0029  0.1417  0.0014 -0.0022]]\n",
      "mean_state_value 0.021407986478206995\n",
      "episode 359/600\n",
      "p1 0.8880000000000002 p0 0.027999999999999935\n",
      "trajectorySteps 92\n",
      "[[ 0  2 25 24  1]\n",
      " [ 0  2  0  0  1]\n",
      " [ 6 19  1  3  2]\n",
      " [ 3  0  2  1  0]\n",
      " [ 0  0  0  0  0]]\n",
      "⬇️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "➡️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0229 -0.0306 -0.0193 -0.0073 -0.0076]\n",
      " [-0.0442 -0.0099 -0.0129 -0.0147 -0.0065]\n",
      " [-0.0266 -0.0464  0.0692 -0.0139 -0.0061]\n",
      " [-0.0348  0.0996  0.5244  0.0478 -0.0097]\n",
      " [-0.0263 -0.0028  0.1419  0.0014 -0.0022]]\n",
      "mean_state_value 0.021573867327401887\n",
      "episode 360/600\n",
      "p1 0.8888000000000003 p0 0.027799999999999936\n",
      "trajectorySteps 19\n",
      "[[0 0 0 0 0]\n",
      " [0 0 1 1 0]\n",
      " [3 1 1 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 1 2 2 2]]\n",
      "⬇️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "➡️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0228 -0.0305 -0.0192 -0.0072 -0.0075]\n",
      " [-0.044  -0.0098 -0.0128 -0.0146 -0.0065]\n",
      " [-0.027  -0.0464  0.069  -0.0138 -0.0061]\n",
      " [-0.0346  0.0997  0.526   0.0478 -0.0097]\n",
      " [-0.0262 -0.0027  0.1431  0.0013 -0.0022]]\n",
      "mean_state_value 0.021730272217653956\n",
      "episode 361/600\n",
      "p1 0.8896000000000003 p0 0.027599999999999937\n",
      "trajectorySteps 236\n",
      "[[25  0  2  1  1]\n",
      " [25  4  1  0  1]\n",
      " [82 85  0  0  1]\n",
      " [ 1  1  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬇️⏪⏩️⬇️⬇️\n",
      "⬇️🔄⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0238 -0.0303 -0.0191 -0.0072 -0.0075]\n",
      " [-0.0443 -0.0101 -0.0127 -0.0145 -0.0065]\n",
      " [-0.0272 -0.0481  0.0691 -0.0137 -0.0061]\n",
      " [-0.0344  0.0999  0.5284  0.0479 -0.0096]\n",
      " [-0.0261 -0.0026  0.1445  0.0015 -0.0022]]\n",
      "mean_state_value 0.021812508121365988\n",
      "episode 362/600\n",
      "p1 0.8904000000000003 p0 0.027399999999999935\n",
      "trajectorySteps 8\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [3 0 0 0 0]\n",
      " [2 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬇️⏪⏩️⬇️⬇️\n",
      "🔄🔄⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0237 -0.0301 -0.0189 -0.0072 -0.0075]\n",
      " [-0.0441 -0.01   -0.0126 -0.0144 -0.0064]\n",
      " [-0.0273 -0.0478  0.0692 -0.0136 -0.0061]\n",
      " [-0.0345  0.1013  0.5308  0.0479 -0.0096]\n",
      " [-0.026  -0.0026  0.1447  0.0015 -0.0022]]\n",
      "mean_state_value 0.02203783946364046\n",
      "episode 363/600\n",
      "p1 0.8912000000000002 p0 0.027199999999999936\n",
      "trajectorySteps 55\n",
      "[[ 0  0  0  0  0]\n",
      " [ 2  0  0  0  0]\n",
      " [13 37  0  0  0]\n",
      " [ 0  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0236 -0.03   -0.0188 -0.0071 -0.0074]\n",
      " [-0.0438 -0.01   -0.0125 -0.0143 -0.0064]\n",
      " [-0.0272 -0.0479  0.0693 -0.0135 -0.006 ]\n",
      " [-0.0343  0.1028  0.5332  0.048  -0.0095]\n",
      " [-0.0258 -0.0025  0.1448  0.0015 -0.0022]]\n",
      "mean_state_value 0.02226408955690212\n",
      "episode 364/600\n",
      "p1 0.8920000000000002 p0 0.026999999999999934\n",
      "trajectorySteps 69\n",
      "[[ 1  1  1  1  1]\n",
      " [ 1  0  0  0  1]\n",
      " [52  1  0  0  1]\n",
      " [ 2  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬇️⏪⏩️⬇️⬇️\n",
      "➡️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0236 -0.0298 -0.0187 -0.0071 -0.0074]\n",
      " [-0.0436 -0.0099 -0.0124 -0.0142 -0.0064]\n",
      " [-0.0272 -0.0476  0.0694 -0.0134 -0.006 ]\n",
      " [-0.0341  0.1029  0.5356  0.048  -0.0094]\n",
      " [-0.0257 -0.0025  0.1463  0.0017 -0.0022]]\n",
      "mean_state_value 0.022507960904061278\n",
      "episode 365/600\n",
      "p1 0.8928000000000003 p0 0.026799999999999935\n",
      "trajectorySteps 7\n",
      "[[0 0 0 0 0]\n",
      " [1 0 0 0 0]\n",
      " [2 1 0 0 0]\n",
      " [0 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0235 -0.0296 -0.0186 -0.0071 -0.0073]\n",
      " [-0.0433 -0.0099 -0.0123 -0.0141 -0.0064]\n",
      " [-0.0272 -0.0476  0.0695 -0.0133 -0.006 ]\n",
      " [-0.0339  0.1044  0.538   0.0481 -0.0094]\n",
      " [-0.0256 -0.0024  0.1464  0.0017 -0.0022]]\n",
      "mean_state_value 0.022736373022359112\n",
      "episode 366/600\n",
      "p1 0.8936000000000003 p0 0.026599999999999936\n",
      "trajectorySteps 18\n",
      "[[1 1 2 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 2]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0234 -0.0294 -0.0185 -0.0071 -0.0073]\n",
      " [-0.0431 -0.0098 -0.0123 -0.014  -0.0063]\n",
      " [-0.0271 -0.0473  0.0696 -0.0133 -0.006 ]\n",
      " [-0.0337  0.1046  0.5404  0.0481 -0.0093]\n",
      " [-0.0254 -0.0024  0.1479  0.0019 -0.0021]]\n",
      "mean_state_value 0.022983888424264083\n",
      "episode 367/600\n",
      "p1 0.8944000000000003 p0 0.026399999999999934\n",
      "trajectorySteps 18\n",
      "[[1 1 1 1 1]\n",
      " [2 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 2]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬇️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0233 -0.0293 -0.0183 -0.007  -0.0073]\n",
      " [-0.0428 -0.0098 -0.0122 -0.0139 -0.0063]\n",
      " [-0.0272 -0.047   0.0697 -0.0132 -0.006 ]\n",
      " [-0.0335  0.1047  0.5428  0.0482 -0.0093]\n",
      " [-0.0253 -0.0024  0.1493  0.0021 -0.0021]]\n",
      "mean_state_value 0.023223514090623794\n",
      "episode 368/600\n",
      "p1 0.8952000000000002 p0 0.026199999999999935\n",
      "trajectorySteps 90\n",
      "[[ 1  1  1  1  1]\n",
      " [36  0  0  0  1]\n",
      " [38  2  0  0  1]\n",
      " [ 1  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0233 -0.0291 -0.0182 -0.007  -0.0072]\n",
      " [-0.0426 -0.0097 -0.0121 -0.0138 -0.0063]\n",
      " [-0.0269 -0.0467  0.0698 -0.0131 -0.0059]\n",
      " [-0.0334  0.1048  0.5452  0.0482 -0.0092]\n",
      " [-0.0252 -0.0023  0.1508  0.0023 -0.0021]]\n",
      "mean_state_value 0.023477548657893844\n",
      "episode 369/600\n",
      "p1 0.8960000000000002 p0 0.025999999999999933\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0232 -0.0289 -0.0181 -0.007  -0.0072]\n",
      " [-0.0423 -0.0097 -0.012  -0.0137 -0.0063]\n",
      " [-0.0269 -0.0464  0.0699 -0.013  -0.0059]\n",
      " [-0.0332  0.105   0.5476  0.0483 -0.0091]\n",
      " [-0.0251 -0.0023  0.1522  0.0024 -0.0021]]\n",
      "mean_state_value 0.023726024155880387\n",
      "episode 370/600\n",
      "p1 0.8968000000000003 p0 0.025799999999999934\n",
      "trajectorySteps 20\n",
      "[[2 2 1 0 0]\n",
      " [1 0 1 1 0]\n",
      " [2 0 0 1 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 2 1 1]]\n",
      "➡️⬅️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0231 -0.0289 -0.0182 -0.0069 -0.0071]\n",
      " [-0.0421 -0.0096 -0.0119 -0.0136 -0.0062]\n",
      " [-0.0268 -0.0461  0.07   -0.0129 -0.0059]\n",
      " [-0.033   0.1051  0.55    0.0484 -0.0091]\n",
      " [-0.0249 -0.0022  0.1537  0.0025 -0.0021]]\n",
      "mean_state_value 0.02395414407431261\n",
      "episode 371/600\n",
      "p1 0.8976000000000003 p0 0.025599999999999935\n",
      "trajectorySteps 243\n",
      "[[107 102   2   1   1]\n",
      " [  7   4   0   0   2]\n",
      " [  1   0   0   1   7]\n",
      " [  0   0   2   0   3]\n",
      " [  0   0   1   1   1]]\n",
      "⬇️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "🔄⬅️⏬🔄⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0233 -0.0299 -0.0181 -0.0069 -0.0071]\n",
      " [-0.0423 -0.0096 -0.0118 -0.0135 -0.0062]\n",
      " [-0.0271 -0.0458  0.0701 -0.0128 -0.0059]\n",
      " [-0.0328  0.1053  0.5524  0.0484 -0.009 ]\n",
      " [-0.0248 -0.0022  0.1552  0.0026 -0.002 ]]\n",
      "mean_state_value 0.02410996277831058\n",
      "episode 372/600\n",
      "p1 0.8984000000000003 p0 0.025399999999999933\n",
      "trajectorySteps 73\n",
      "[[ 5  2  1  1  1]\n",
      " [ 5  0  0  0  1]\n",
      " [49  0  0  0  1]\n",
      " [ 1  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "⬇️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "➡️⬅️⏬🔄⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0232 -0.0298 -0.018  -0.0069 -0.0071]\n",
      " [-0.042  -0.0095 -0.0117 -0.0135 -0.0062]\n",
      " [-0.0272 -0.0455  0.0702 -0.0128 -0.0059]\n",
      " [-0.0326  0.1054  0.554   0.0485 -0.0089]\n",
      " [-0.0247 -0.0022  0.1561  0.0028 -0.002 ]]\n",
      "mean_state_value 0.02429136351908583\n",
      "episode 373/600\n",
      "p1 0.8992000000000002 p0 0.025199999999999934\n",
      "trajectorySteps 13\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [5 5 1 0 0]\n",
      " [0 0 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "⬇️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "🔄⬅️⏬🔄⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0231 -0.0296 -0.0179 -0.0069 -0.007 ]\n",
      " [-0.0418 -0.0095 -0.0117 -0.0134 -0.0061]\n",
      " [-0.0273 -0.0455  0.0717 -0.0127 -0.0058]\n",
      " [-0.0324  0.1055  0.5564  0.0485 -0.0089]\n",
      " [-0.0245 -0.0021  0.1562  0.0029 -0.002 ]]\n",
      "mean_state_value 0.024523877203069935\n",
      "episode 374/600\n",
      "p1 0.9000000000000002 p0 0.024999999999999932\n",
      "trajectorySteps 176\n",
      "[[ 22   1   1   1   1]\n",
      " [ 25   1   0   0   2]\n",
      " [105   3   0   0   2]\n",
      " [  5   0   2   0   1]\n",
      " [  1   0   1   1   1]]\n",
      "⬇️➡️➡️➡️⬇️\n",
      "⬇️⏪⏩️⬇️⬇️\n",
      "⬇️⬅️⏬🔄⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0241 -0.0294 -0.0178 -0.0068 -0.007 ]\n",
      " [-0.0418 -0.0095 -0.0116 -0.0133 -0.0061]\n",
      " [-0.0273 -0.0452  0.0718 -0.0126 -0.0058]\n",
      " [-0.0323  0.1057  0.5588  0.0486 -0.0088]\n",
      " [-0.0244 -0.0021  0.1577  0.003  -0.002 ]]\n",
      "mean_state_value 0.024709223954788747\n",
      "episode 375/600\n",
      "p1 0.9008000000000003 p0 0.024799999999999933\n",
      "trajectorySteps 26\n",
      "[[ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [ 9  1  0  0  0]\n",
      " [10  1  2  0  0]\n",
      " [ 2  0  0  0  0]]\n",
      "⬇️➡️➡️➡️⬇️\n",
      "⬇️⏪⏩️⬇️⬇️\n",
      "➡️⬅️⏬🔄⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0241 -0.0292 -0.0177 -0.0068 -0.0069]\n",
      " [-0.0416 -0.0094 -0.0115 -0.0132 -0.0061]\n",
      " [-0.0276 -0.045   0.0719 -0.0125 -0.0058]\n",
      " [-0.0324  0.1072  0.5612  0.0486 -0.0087]\n",
      " [-0.0243 -0.002   0.1579  0.0031 -0.002 ]]\n",
      "mean_state_value 0.024928535691204904\n",
      "episode 376/600\n",
      "p1 0.9016000000000003 p0 0.024599999999999934\n",
      "trajectorySteps 12\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [5 4 1 0 0]\n",
      " [0 0 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "⬇️➡️➡️➡️⬇️\n",
      "⬇️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬🔄⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.024  -0.029  -0.0175 -0.0068 -0.0069]\n",
      " [-0.0413 -0.0094 -0.0114 -0.0131 -0.0061]\n",
      " [-0.0276 -0.0449  0.0734 -0.0124 -0.0058]\n",
      " [-0.0322  0.1073  0.5637  0.0487 -0.0087]\n",
      " [-0.0242 -0.002   0.158   0.0031 -0.002 ]]\n",
      "mean_state_value 0.025161951228209252\n",
      "episode 377/600\n",
      "p1 0.9024000000000003 p0 0.024399999999999932\n",
      "trajectorySteps 279\n",
      "[[  4   0   0   0   0]\n",
      " [124   1   0   0   0]\n",
      " [137   3   1   0   0]\n",
      " [  7   0   2   0   0]\n",
      " [  0   0   0   0   0]]\n",
      "⬇️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "🔄⬅️⏬🔄⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.024  -0.0289 -0.0174 -0.0068 -0.0069]\n",
      " [-0.0416 -0.0093 -0.0113 -0.013  -0.006 ]\n",
      " [-0.0277 -0.0449  0.0749 -0.0123 -0.0057]\n",
      " [-0.0321  0.1075  0.5661  0.0487 -0.0086]\n",
      " [-0.024  -0.002   0.1582  0.0031 -0.0019]]\n",
      "mean_state_value 0.02536792341503408\n",
      "episode 378/600\n",
      "p1 0.9032000000000002 p0 0.024199999999999933\n",
      "trajectorySteps 34\n",
      "[[10  1  1  1  1]\n",
      " [11  0  0  0  1]\n",
      " [ 1  0  0  0  1]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "⬇️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "🔄⬅️⏬🔄⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.024  -0.0287 -0.0173 -0.0067 -0.0068]\n",
      " [-0.0414 -0.0093 -0.0112 -0.0129 -0.006 ]\n",
      " [-0.0276 -0.0446  0.075  -0.0122 -0.0057]\n",
      " [-0.0319  0.1076  0.5678  0.0488 -0.0085]\n",
      " [-0.0239 -0.0019  0.1597  0.0033 -0.0019]]\n",
      "mean_state_value 0.02558888635305586\n",
      "episode 379/600\n",
      "p1 0.9040000000000002 p0 0.02399999999999993\n",
      "trajectorySteps 20\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [14  3  1  0  0]\n",
      " [ 0  0  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "⬇️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "🔄⬅️⏬🔄⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0239 -0.0285 -0.0172 -0.0067 -0.0068]\n",
      " [-0.0411 -0.0092 -0.0111 -0.0128 -0.006 ]\n",
      " [-0.0276 -0.0445  0.0765 -0.0121 -0.0057]\n",
      " [-0.0317  0.1077  0.5703  0.0488 -0.0085]\n",
      " [-0.0238 -0.0019  0.1599  0.0033 -0.0019]]\n",
      "mean_state_value 0.02582658833152536\n",
      "episode 380/600\n",
      "p1 0.9048000000000003 p0 0.023799999999999932\n",
      "trajectorySteps 217\n",
      "[[ 21   1   1   1   1]\n",
      " [ 25   0   0   0   1]\n",
      " [155   0   0   0   1]\n",
      " [  2   0   2   0   1]\n",
      " [  0   0   1   2   2]]\n",
      "⬇️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬🔄⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0239 -0.0283 -0.017  -0.0067 -0.0067]\n",
      " [-0.041  -0.0092 -0.011  -0.0127 -0.006 ]\n",
      " [-0.0277 -0.0442  0.0766 -0.012  -0.0057]\n",
      " [-0.0315  0.1079  0.5727  0.0489 -0.0084]\n",
      " [-0.0236 -0.0018  0.1614  0.0035 -0.0019]]\n",
      "mean_state_value 0.02606337337311727\n",
      "episode 381/600\n",
      "p1 0.9056000000000003 p0 0.023599999999999934\n",
      "trajectorySteps 120\n",
      "[[49  1  1  1  1]\n",
      " [50  2  0  0  1]\n",
      " [ 5  2  0  0  1]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬇️⬅️⏬🔄⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0242 -0.0282 -0.0169 -0.0066 -0.0067]\n",
      " [-0.0416 -0.0091 -0.011  -0.0126 -0.0059]\n",
      " [-0.0279 -0.0439  0.0767 -0.0119 -0.0056]\n",
      " [-0.0313  0.108   0.5752  0.0489 -0.0083]\n",
      " [-0.0235 -0.0018  0.1628  0.0037 -0.0019]]\n",
      "mean_state_value 0.026252422438574854\n",
      "episode 382/600\n",
      "p1 0.9064000000000003 p0 0.02339999999999993\n",
      "trajectorySteps 16\n",
      "[[1 1 1 2 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬇️⬅️⏬🔄⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0242 -0.028  -0.0168 -0.0066 -0.0067]\n",
      " [-0.0413 -0.0091 -0.0109 -0.0125 -0.0059]\n",
      " [-0.0279 -0.0436  0.0768 -0.0118 -0.0056]\n",
      " [-0.0311  0.1082  0.5776  0.049  -0.0083]\n",
      " [-0.0234 -0.0018  0.1643  0.0039 -0.0019]]\n",
      "mean_state_value 0.026505452922070308\n",
      "episode 383/600\n",
      "p1 0.9072000000000002 p0 0.023199999999999932\n",
      "trajectorySteps 87\n",
      "[[ 2  2  1  1  1]\n",
      " [ 2  1  0  0  1]\n",
      " [31  0  0  0  2]\n",
      " [33  0  2  0  2]\n",
      " [ 3  0  1  1  1]]\n",
      "⬇️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬇️⬅️⏬🔄⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0245 -0.028  -0.0167 -0.0066 -0.0066]\n",
      " [-0.041  -0.009  -0.0108 -0.0124 -0.0059]\n",
      " [-0.0276 -0.0433  0.0769 -0.0118 -0.0056]\n",
      " [-0.031   0.1083  0.5801  0.0491 -0.0082]\n",
      " [-0.0233 -0.0017  0.1658  0.0041 -0.0018]]\n",
      "mean_state_value 0.026735059926867276\n",
      "episode 384/600\n",
      "p1 0.9080000000000003 p0 0.02299999999999993\n",
      "trajectorySteps 19\n",
      "[[1 0 0 0 0]\n",
      " [2 0 0 0 0]\n",
      " [3 0 0 0 0]\n",
      " [6 1 2 0 0]\n",
      " [4 0 0 0 0]]\n",
      "⬇️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬇️⬅️⏬🔄⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0245 -0.0278 -0.0166 -0.0066 -0.0066]\n",
      " [-0.0408 -0.009  -0.0107 -0.0123 -0.0059]\n",
      " [-0.0275 -0.043   0.077  -0.0117 -0.0056]\n",
      " [-0.0311  0.1093  0.5816  0.0491 -0.0082]\n",
      " [-0.0235 -0.0017  0.166   0.0041 -0.0018]]\n",
      "mean_state_value 0.026902170332165815\n",
      "episode 385/600\n",
      "p1 0.9088000000000003 p0 0.02279999999999993\n",
      "trajectorySteps 103\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 2  0  0  0  0]\n",
      " [ 2  0  2  0  0]\n",
      " [95  1  1  0  0]]\n",
      "⬇️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬇️⬅️⏬🔄⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0244 -0.0277 -0.0164 -0.0065 -0.0065]\n",
      " [-0.0405 -0.0089 -0.0106 -0.0122 -0.0058]\n",
      " [-0.0274 -0.0427  0.0771 -0.0116 -0.0055]\n",
      " [-0.0309  0.1094  0.5841  0.0492 -0.0081]\n",
      " [-0.0242 -0.0015  0.1675  0.0042 -0.0018]]\n",
      "mean_state_value 0.02712385699805299\n",
      "episode 386/600\n",
      "p1 0.9096000000000003 p0 0.022599999999999933\n",
      "trajectorySteps 67\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [29  0  2  0  0]\n",
      " [33  1  1  0  0]]\n",
      "⬇️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬇️⬅️⏬🔄⬇️\n",
      "🔄⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0243 -0.0275 -0.0163 -0.0065 -0.0065]\n",
      " [-0.0402 -0.0089 -0.0105 -0.0121 -0.0058]\n",
      " [-0.0274 -0.0424  0.0772 -0.0115 -0.0055]\n",
      " [-0.0307  0.1095  0.5865  0.0492 -0.008 ]\n",
      " [-0.0244 -0.0013  0.169   0.0042 -0.0018]]\n",
      "mean_state_value 0.027363166323242788\n",
      "episode 387/600\n",
      "p1 0.9104000000000003 p0 0.02239999999999993\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "⬇️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬇️⬅️⏬🔄⬇️\n",
      "🔄⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0243 -0.0273 -0.0162 -0.0065 -0.0065]\n",
      " [-0.04   -0.0088 -0.0104 -0.012  -0.0058]\n",
      " [-0.0273 -0.0421  0.0773 -0.0114 -0.0055]\n",
      " [-0.0305  0.1097  0.589   0.0493 -0.008 ]\n",
      " [-0.0242 -0.0012  0.1705  0.0044 -0.0018]]\n",
      "mean_state_value 0.027617677123081604\n",
      "episode 388/600\n",
      "p1 0.9112000000000002 p0 0.02219999999999993\n",
      "trajectorySteps 50\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 2  0  0  0  0]\n",
      " [43  1  2  0  0]\n",
      " [ 2  0  0  0  0]]\n",
      "⬇️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬇️⬅️⏬🔄⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0242 -0.0271 -0.0161 -0.0065 -0.0064]\n",
      " [-0.0397 -0.0088 -0.0103 -0.0119 -0.0057]\n",
      " [-0.0273 -0.0418  0.0775 -0.0113 -0.0055]\n",
      " [-0.0308  0.1112  0.5915  0.0493 -0.0079]\n",
      " [-0.0241 -0.0012  0.1707  0.0044 -0.0018]]\n",
      "mean_state_value 0.02784954500651661\n",
      "episode 389/600\n",
      "p1 0.9120000000000003 p0 0.02199999999999993\n",
      "trajectorySteps 34\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 2  1  0  0  0]\n",
      " [14  0  2  0  0]\n",
      " [13  1  1  0  0]]\n",
      "⬇️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬇️⬅️⏬🔄⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0241 -0.0269 -0.016  -0.0064 -0.0064]\n",
      " [-0.0394 -0.0087 -0.0103 -0.0118 -0.0057]\n",
      " [-0.0272 -0.0415  0.0776 -0.0112 -0.0054]\n",
      " [-0.0306  0.1114  0.594   0.0494 -0.0078]\n",
      " [-0.0242 -0.001   0.1722  0.0044 -0.0017]]\n",
      "mean_state_value 0.02809407266693101\n",
      "episode 390/600\n",
      "p1 0.9128000000000003 p0 0.02179999999999993\n",
      "trajectorySteps 30\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [14  1  0  0  0]\n",
      " [12  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "⬇️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬇️⬅️⏬🔄⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0241 -0.0268 -0.0158 -0.0064 -0.0063]\n",
      " [-0.0392 -0.0087 -0.0102 -0.0117 -0.0057]\n",
      " [-0.027  -0.0415  0.0777 -0.0111 -0.0054]\n",
      " [-0.0308  0.1129  0.5958  0.0494 -0.0078]\n",
      " [-0.024  -0.0009  0.1724  0.0045 -0.0017]]\n",
      "mean_state_value 0.02830158738263287\n",
      "episode 391/600\n",
      "p1 0.9136000000000003 p0 0.021599999999999932\n",
      "trajectorySteps 256\n",
      "[[29  2  1  1  1]\n",
      " [32  3  0  0  1]\n",
      " [91  2  0  0  1]\n",
      " [82  0  2  0  1]\n",
      " [ 2  0  2  2  1]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬇️⬅️⏬🔄⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0243 -0.0266 -0.0157 -0.0064 -0.0063]\n",
      " [-0.04   -0.0086 -0.0101 -0.0116 -0.0057]\n",
      " [-0.0262 -0.0412  0.0778 -0.011  -0.0054]\n",
      " [-0.0307  0.1131  0.5972  0.0495 -0.0077]\n",
      " [-0.0239 -0.0009  0.1731  0.0045 -0.0017]]\n",
      "mean_state_value 0.02843940290219009\n",
      "episode 392/600\n",
      "p1 0.9144000000000003 p0 0.02139999999999993\n",
      "trajectorySteps 44\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 2  1  0  0  0]\n",
      " [18  0  2  0  0]\n",
      " [19  1  1  0  0]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬇️⬅️⏬🔄⬇️\n",
      "🔄⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0243 -0.0264 -0.0156 -0.0064 -0.0063]\n",
      " [-0.0397 -0.0086 -0.01   -0.0115 -0.0056]\n",
      " [-0.0262 -0.0409  0.0779 -0.0109 -0.0054]\n",
      " [-0.0308  0.1132  0.5997  0.0495 -0.0076]\n",
      " [-0.024  -0.0007  0.1746  0.0045 -0.0017]]\n",
      "mean_state_value 0.028675140223071525\n",
      "episode 393/600\n",
      "p1 0.9152000000000002 p0 0.02119999999999993\n",
      "trajectorySteps 27\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 3  0  0  0  0]\n",
      " [21  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬇️⬅️⏬🔄⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.420e-02 -2.620e-02 -1.550e-02 -6.300e-03 -6.200e-03]\n",
      " [-3.940e-02 -8.500e-03 -9.900e-03 -1.140e-02 -5.600e-03]\n",
      " [-2.610e-02 -4.060e-02  7.800e-02 -1.090e-02 -5.300e-03]\n",
      " [-3.110e-02  1.148e-01  6.021e-01  4.960e-02 -7.600e-03]\n",
      " [-2.390e-02 -6.000e-04  1.748e-01  4.600e-03 -1.700e-03]]\n",
      "mean_state_value 0.028907721359331798\n",
      "episode 394/600\n",
      "p1 0.9160000000000003 p0 0.02099999999999993\n",
      "trajectorySteps 126\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [62  1  2  0  0]\n",
      " [60  0  0  0  0]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬇️⬅️⏬🔄⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-2.410e-02 -2.610e-02 -1.540e-02 -6.300e-03 -6.200e-03]\n",
      " [-3.910e-02 -8.500e-03 -9.800e-03 -1.130e-02 -5.600e-03]\n",
      " [-2.610e-02 -4.030e-02  7.810e-02 -1.080e-02 -5.300e-03]\n",
      " [-3.120e-02  1.163e-01  6.046e-01  4.970e-02 -7.500e-03]\n",
      " [-2.420e-02 -6.000e-04  1.750e-01  4.600e-03 -1.700e-03]]\n",
      "mean_state_value 0.02912960131532123\n",
      "episode 395/600\n",
      "p1 0.9168000000000003 p0 0.02079999999999993\n",
      "trajectorySteps 22\n",
      "[[6 1 1 2 2]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬇️⬅️⏬🔄⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-2.410e-02 -2.590e-02 -1.520e-02 -6.300e-03 -6.100e-03]\n",
      " [-3.890e-02 -8.400e-03 -9.700e-03 -1.120e-02 -5.600e-03]\n",
      " [-2.600e-02 -4.000e-02  7.820e-02 -1.070e-02 -5.300e-03]\n",
      " [-3.100e-02  1.165e-01  6.071e-01  4.970e-02 -7.400e-03]\n",
      " [-2.400e-02 -6.000e-04  1.765e-01  4.800e-03 -1.600e-03]]\n",
      "mean_state_value 0.0293873009101296\n",
      "episode 396/600\n",
      "p1 0.9176000000000003 p0 0.02059999999999993\n",
      "trajectorySteps 44\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [ 1  0  2  0  0]\n",
      " [38  1  1  0  0]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬇️⬅️⏬🔄⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.400e-02 -2.570e-02 -1.510e-02 -6.200e-03 -6.100e-03]\n",
      " [-3.860e-02 -8.400e-03 -9.600e-03 -1.110e-02 -5.500e-03]\n",
      " [-2.590e-02 -3.970e-02  7.830e-02 -1.060e-02 -5.300e-03]\n",
      " [-3.080e-02  1.166e-01  6.096e-01  4.980e-02 -7.400e-03]\n",
      " [-2.460e-02 -3.000e-04  1.780e-01  4.800e-03 -1.600e-03]]\n",
      "mean_state_value 0.02961757259997852\n",
      "episode 397/600\n",
      "p1 0.9184000000000003 p0 0.02039999999999993\n",
      "trajectorySteps 25\n",
      "[[11  1  1  1  1]\n",
      " [ 1  0  0  0  1]\n",
      " [ 1  0  0  0  1]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "🔄➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬇️⬅️⏬🔄⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.390e-02 -2.550e-02 -1.500e-02 -6.200e-03 -6.100e-03]\n",
      " [-3.830e-02 -8.400e-03 -9.600e-03 -1.110e-02 -5.500e-03]\n",
      " [-2.590e-02 -3.940e-02  7.840e-02 -1.050e-02 -5.200e-03]\n",
      " [-3.060e-02  1.167e-01  6.121e-01  4.980e-02 -7.300e-03]\n",
      " [-2.450e-02 -3.000e-04  1.796e-01  5.000e-03 -1.600e-03]]\n",
      "mean_state_value 0.02987593431898758\n",
      "episode 398/600\n",
      "p1 0.9192000000000002 p0 0.02019999999999993\n",
      "trajectorySteps 132\n",
      "[[116   1   1   1   1]\n",
      " [  3   0   0   0   1]\n",
      " [  1   0   0   0   1]\n",
      " [  0   0   2   0   1]\n",
      " [  0   0   1   1   1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬇️⬅️⏬🔄⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.390e-02 -2.530e-02 -1.490e-02 -6.200e-03 -6.000e-03]\n",
      " [-3.810e-02 -8.300e-03 -9.500e-03 -1.100e-02 -5.500e-03]\n",
      " [-2.580e-02 -3.910e-02  7.850e-02 -1.040e-02 -5.200e-03]\n",
      " [-3.040e-02  1.169e-01  6.146e-01  4.990e-02 -7.200e-03]\n",
      " [-2.430e-02 -3.000e-04  1.811e-01  5.200e-03 -1.600e-03]]\n",
      "mean_state_value 0.030131581946234443\n",
      "episode 399/600\n",
      "p1 0.9200000000000003 p0 0.019999999999999928\n",
      "trajectorySteps 33\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [13  0  2  0  0]\n",
      " [15  1  1  0  0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬇️⬅️⏬🔄⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0239 -0.0252 -0.0148 -0.0062 -0.006 ]\n",
      " [-0.0378 -0.0083 -0.0094 -0.0109 -0.0054]\n",
      " [-0.0257 -0.0388  0.0786 -0.0103 -0.0052]\n",
      " [-0.0305  0.117   0.6171  0.0499 -0.0072]\n",
      " [-0.0244 -0.      0.1827  0.0053 -0.0016]]\n",
      "mean_state_value 0.030370969555453372\n",
      "episode 400/600\n",
      "p1 0.9208000000000003 p0 0.01979999999999993\n",
      "trajectorySteps 33\n",
      "[[ 1  1  1  2  1]\n",
      " [ 1  0  0  0  1]\n",
      " [10  0  0  0  1]\n",
      " [ 8  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬇️⬅️⏬🔄⬇️\n",
      "🔄⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0238 -0.025  -0.0146 -0.0061 -0.006 ]\n",
      " [-0.0375 -0.0082 -0.0093 -0.0108 -0.0054]\n",
      " [-0.0256 -0.0385  0.0787 -0.0102 -0.0051]\n",
      " [-0.0304  0.1172  0.6196  0.05   -0.0071]\n",
      " [-0.0243  0.      0.1842  0.0055 -0.0016]]\n",
      "mean_state_value 0.030626558456851652\n",
      "episode 401/600\n",
      "p1 0.9216000000000003 p0 0.01959999999999993\n",
      "trajectorySteps 124\n",
      "[[  0   0   0   0   0]\n",
      " [  0   0   0   0   0]\n",
      " [  2   1   0   0   0]\n",
      " [113   1   2   0   0]\n",
      " [  5   0   0   0   0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬇️⬅️⏬🔄⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-2.370e-02 -2.480e-02 -1.450e-02 -6.100e-03 -5.900e-03]\n",
      " [-3.730e-02 -8.200e-03 -9.200e-03 -1.070e-02 -5.400e-03]\n",
      " [-2.560e-02 -3.820e-02  7.880e-02 -1.010e-02 -5.100e-03]\n",
      " [-3.070e-02  1.188e-01  6.221e-01  5.000e-02 -7.000e-03]\n",
      " [-2.420e-02  1.000e-04  1.844e-01  5.500e-03 -1.500e-03]]\n",
      "mean_state_value 0.03086209761824614\n",
      "episode 402/600\n",
      "p1 0.9224000000000003 p0 0.019399999999999928\n",
      "trajectorySteps 28\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [12  0  0  0  0]\n",
      " [12  1  2  0  0]\n",
      " [ 1  0  0  0  0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬇️⬅️⏬🔄⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-2.370e-02 -2.460e-02 -1.440e-02 -6.100e-03 -5.900e-03]\n",
      " [-3.700e-02 -8.100e-03 -9.100e-03 -1.060e-02 -5.400e-03]\n",
      " [-2.540e-02 -3.790e-02  7.890e-02 -1.000e-02 -5.100e-03]\n",
      " [-3.070e-02  1.196e-01  6.236e-01  5.010e-02 -7.000e-03]\n",
      " [-2.400e-02  1.000e-04  1.846e-01  5.500e-03 -1.500e-03]]\n",
      "mean_state_value 0.03103439449126306\n",
      "episode 403/600\n",
      "p1 0.9232000000000002 p0 0.01919999999999993\n",
      "trajectorySteps 22\n",
      "[[2 1 1 1 1]\n",
      " [2 0 0 0 1]\n",
      " [4 0 0 0 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 2 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬇️⬅️⏬🔄⬇️\n",
      "🔄⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-2.360e-02 -2.440e-02 -1.430e-02 -6.100e-03 -5.800e-03]\n",
      " [-3.670e-02 -8.100e-03 -9.000e-03 -1.050e-02 -5.300e-03]\n",
      " [-2.540e-02 -3.760e-02  7.900e-02 -9.900e-03 -5.100e-03]\n",
      " [-3.050e-02  1.197e-01  6.261e-01  5.010e-02 -6.900e-03]\n",
      " [-2.390e-02  1.000e-04  1.861e-01  5.700e-03 -1.500e-03]]\n",
      "mean_state_value 0.031292685814528265\n",
      "episode 404/600\n",
      "p1 0.9240000000000003 p0 0.018999999999999927\n",
      "trajectorySteps 119\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 2  0  0  0  0]\n",
      " [99  1  2  0  0]\n",
      " [15  0  0  0  0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬇️⬅️⏬🔄⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-2.350e-02 -2.430e-02 -1.420e-02 -6.000e-03 -5.800e-03]\n",
      " [-3.640e-02 -8.000e-03 -9.000e-03 -1.040e-02 -5.300e-03]\n",
      " [-2.530e-02 -3.730e-02  7.910e-02 -9.900e-03 -5.000e-03]\n",
      " [-3.060e-02  1.213e-01  6.286e-01  5.020e-02 -6.800e-03]\n",
      " [-2.380e-02  2.000e-04  1.863e-01  5.800e-03 -1.500e-03]]\n",
      "mean_state_value 0.03153848942596746\n",
      "episode 405/600\n",
      "p1 0.9248000000000003 p0 0.018799999999999928\n",
      "trajectorySteps 101\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 2  1  0  0  0]\n",
      " [ 3  0  2  0  0]\n",
      " [91  1  1  0  0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬇️⬅️⏬🔄⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-2.350e-02 -2.410e-02 -1.400e-02 -6.000e-03 -5.800e-03]\n",
      " [-3.620e-02 -8.000e-03 -8.900e-03 -1.030e-02 -5.300e-03]\n",
      " [-2.520e-02 -3.700e-02  7.920e-02 -9.800e-03 -5.000e-03]\n",
      " [-3.040e-02  1.214e-01  6.311e-01  5.030e-02 -6.800e-03]\n",
      " [-2.460e-02  4.000e-04  1.879e-01  5.800e-03 -1.500e-03]]\n",
      "mean_state_value 0.031758380477229925\n",
      "episode 406/600\n",
      "p1 0.9256000000000003 p0 0.01859999999999993\n",
      "trajectorySteps 89\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 2  0  0  0  0]\n",
      " [ 3  0  2  0  0]\n",
      " [80  1  1  0  0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬇️⬅️⏬🔄⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.340e-02 -2.390e-02 -1.390e-02 -6.000e-03 -5.700e-03]\n",
      " [-3.590e-02 -7.900e-03 -8.800e-03 -1.020e-02 -5.300e-03]\n",
      " [-2.520e-02 -3.670e-02  7.930e-02 -9.700e-03 -5.000e-03]\n",
      " [-3.020e-02  1.216e-01  6.336e-01  5.030e-02 -6.700e-03]\n",
      " [-2.490e-02  6.000e-04  1.894e-01  5.800e-03 -1.500e-03]]\n",
      "mean_state_value 0.03200418324086035\n",
      "episode 407/600\n",
      "p1 0.9264000000000003 p0 0.018399999999999927\n",
      "trajectorySteps 44\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 2  1  0  0  0]\n",
      " [18  0  2  0  0]\n",
      " [19  1  1  0  0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬇️⬅️⏬🔄⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0233 -0.0237 -0.0138 -0.0059 -0.0057]\n",
      " [-0.0356 -0.0079 -0.0087 -0.0101 -0.0052]\n",
      " [-0.0251 -0.0364  0.0795 -0.0096 -0.005 ]\n",
      " [-0.0302  0.1217  0.6362  0.0504 -0.0066]\n",
      " [-0.0249  0.0009  0.191   0.0059 -0.0015]]\n",
      "mean_state_value 0.03224842500026512\n",
      "episode 408/600\n",
      "p1 0.9272000000000002 p0 0.018199999999999928\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬇️⬅️⏬🔄⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0232 -0.0236 -0.0137 -0.0059 -0.0056]\n",
      " [-0.0353 -0.0078 -0.0086 -0.01   -0.0052]\n",
      " [-0.0251 -0.0361  0.0796 -0.0095 -0.0049]\n",
      " [-0.03    0.1219  0.6387  0.0504 -0.0066]\n",
      " [-0.0248  0.0009  0.1926  0.0061 -0.0014]]\n",
      "mean_state_value 0.03251108301537329\n",
      "episode 409/600\n",
      "p1 0.9280000000000003 p0 0.017999999999999926\n",
      "trajectorySteps 152\n",
      "[[ 1  1  1  1  1]\n",
      " [ 1  0  0  0  1]\n",
      " [67  4  0  0  1]\n",
      " [65  1  2  0  1]\n",
      " [ 1  0  1  1  1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬇️⬅️⏬🔄⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0232 -0.0234 -0.0135 -0.0059 -0.0056]\n",
      " [-0.0351 -0.0078 -0.0085 -0.0099 -0.0052]\n",
      " [-0.0248 -0.0358  0.0797 -0.0094 -0.0049]\n",
      " [-0.0304  0.122   0.6412  0.0505 -0.0065]\n",
      " [-0.0246  0.0009  0.1941  0.0063 -0.0014]]\n",
      "mean_state_value 0.03275433472425214\n",
      "episode 410/600\n",
      "p1 0.9288000000000003 p0 0.017799999999999927\n",
      "trajectorySteps 101\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 4  1  0  0  0]\n",
      " [46  0  2  0  0]\n",
      " [46  1  1  0  0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬇️⬅️⏬🔄⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0231 -0.0232 -0.0134 -0.0059 -0.0056]\n",
      " [-0.0348 -0.0077 -0.0084 -0.0098 -0.0051]\n",
      " [-0.0248 -0.0355  0.0798 -0.0093 -0.0049]\n",
      " [-0.0303  0.1222  0.6429  0.0505 -0.0064]\n",
      " [-0.0248  0.0012  0.1957  0.0063 -0.0014]]\n",
      "mean_state_value 0.032963320493867077\n",
      "episode 411/600\n",
      "p1 0.9296000000000003 p0 0.017599999999999928\n",
      "trajectorySteps 58\n",
      "[[ 1  1  1  1  1]\n",
      " [ 1  0  0  0  2]\n",
      " [22  0  0  0  1]\n",
      " [21  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬇️⬅️⏬🔄⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.023  -0.023  -0.0133 -0.0058 -0.0055]\n",
      " [-0.0345 -0.0077 -0.0083 -0.0097 -0.0051]\n",
      " [-0.0246 -0.0352  0.0799 -0.0092 -0.0048]\n",
      " [-0.0303  0.1223  0.6454  0.0506 -0.0064]\n",
      " [-0.0247  0.0012  0.1973  0.0065 -0.0014]]\n",
      "mean_state_value 0.0332271767633328\n",
      "episode 412/600\n",
      "p1 0.9304000000000003 p0 0.017399999999999926\n",
      "trajectorySteps 67\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [32  1  0  0  0]\n",
      " [31  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬇️🔄⏬🔄⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.023  -0.0228 -0.0132 -0.0058 -0.0055]\n",
      " [-0.0343 -0.0076 -0.0083 -0.0096 -0.0051]\n",
      " [-0.0252 -0.0349  0.08   -0.0091 -0.0048]\n",
      " [-0.0304  0.124   0.648   0.0506 -0.0063]\n",
      " [-0.0245  0.0013  0.1975  0.0066 -0.0014]]\n",
      "mean_state_value 0.03344041935252519\n",
      "episode 413/600\n",
      "p1 0.9312000000000002 p0 0.017199999999999927\n",
      "trajectorySteps 21\n",
      "[[1 1 2 1 1]\n",
      " [1 1 0 0 1]\n",
      " [3 1 0 0 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬇️🔄⏬🔄⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0229 -0.0227 -0.0131 -0.0058 -0.0054]\n",
      " [-0.034  -0.0076 -0.0082 -0.0095 -0.0051]\n",
      " [-0.0252 -0.0348  0.0801 -0.009  -0.0048]\n",
      " [-0.0302  0.1241  0.6494  0.0507 -0.0062]\n",
      " [-0.0244  0.0013  0.1982  0.0068 -0.0013]]\n",
      "mean_state_value 0.03361740645820216\n",
      "episode 414/600\n",
      "p1 0.9320000000000003 p0 0.016999999999999925\n",
      "trajectorySteps 23\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [11  0  0  0  0]\n",
      " [ 9  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬇️🔄⏬🔄⬇️\n",
      "🔄⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0228 -0.0225 -0.0129 -0.0058 -0.0054]\n",
      " [-0.0337 -0.0075 -0.0081 -0.0094 -0.005 ]\n",
      " [-0.0259 -0.0345  0.0802 -0.009  -0.0048]\n",
      " [-0.0302  0.1257  0.652   0.0507 -0.0062]\n",
      " [-0.0242  0.0013  0.1984  0.0068 -0.0013]]\n",
      "mean_state_value 0.033837349144393236\n",
      "episode 415/600\n",
      "p1 0.9328000000000003 p0 0.016799999999999926\n",
      "trajectorySteps 42\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 3 31  0  0  0]\n",
      " [ 5  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬇️🔄⏬🔄⬇️\n",
      "🔄⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0228 -0.0223 -0.0128 -0.0057 -0.0054]\n",
      " [-0.0334 -0.0075 -0.008  -0.0093 -0.005 ]\n",
      " [-0.0268 -0.0342  0.0803 -0.0089 -0.0047]\n",
      " [-0.0301  0.1273  0.6545  0.0508 -0.0061]\n",
      " [-0.0241  0.0014  0.1986  0.0069 -0.0013]]\n",
      "mean_state_value 0.0340561579419083\n",
      "episode 416/600\n",
      "p1 0.9336000000000003 p0 0.016599999999999927\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬇️🔄⏬🔄⬇️\n",
      "🔄⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0227 -0.0221 -0.0127 -0.0057 -0.0053]\n",
      " [-0.0332 -0.0074 -0.0079 -0.0092 -0.005 ]\n",
      " [-0.0267 -0.0339  0.0804 -0.0088 -0.0047]\n",
      " [-0.0299  0.1275  0.6571  0.0508 -0.006 ]\n",
      " [-0.0239  0.0014  0.2001  0.0071 -0.0013]]\n",
      "mean_state_value 0.03432159277909337\n",
      "episode 417/600\n",
      "p1 0.9344000000000003 p0 0.016399999999999925\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬇️🔄⏬🔄⬇️\n",
      "🔄⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0226 -0.0219 -0.0126 -0.0057 -0.0053]\n",
      " [-0.0329 -0.0074 -0.0078 -0.0091 -0.005 ]\n",
      " [-0.0266 -0.0336  0.0805 -0.0087 -0.0047]\n",
      " [-0.0297  0.1276  0.6597  0.0509 -0.006 ]\n",
      " [-0.0237  0.0015  0.2017  0.0073 -0.0013]]\n",
      "mean_state_value 0.034587336166058795\n",
      "episode 418/600\n",
      "p1 0.9352000000000003 p0 0.016199999999999926\n",
      "trajectorySteps 86\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 3  0  0  0  0]\n",
      " [79  1  2  0  0]\n",
      " [ 1  0  0  0  0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️🔄⏬🔄⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0225 -0.0218 -0.0125 -0.0056 -0.0052]\n",
      " [-0.0326 -0.0073 -0.0077 -0.009  -0.0049]\n",
      " [-0.0266 -0.0333  0.0806 -0.0086 -0.0047]\n",
      " [-0.0298  0.1293  0.6622  0.051  -0.0059]\n",
      " [-0.0236  0.0015  0.2019  0.0073 -0.0013]]\n",
      "mean_state_value 0.034836033812729164\n",
      "episode 419/600\n",
      "p1 0.9360000000000003 p0 0.015999999999999924\n",
      "trajectorySteps 18\n",
      "[[1 1 1 1 1]\n",
      " [2 0 0 0 1]\n",
      " [3 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️🔄⏬🔄⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0225 -0.0216 -0.0123 -0.0056 -0.0052]\n",
      " [-0.0323 -0.0073 -0.0076 -0.0089 -0.0049]\n",
      " [-0.0265 -0.033   0.0807 -0.0085 -0.0046]\n",
      " [-0.0295  0.1294  0.6648  0.051  -0.0059]\n",
      " [-0.0234  0.0016  0.2035  0.0076 -0.0012]]\n",
      "mean_state_value 0.03510287991649835\n",
      "episode 420/600\n",
      "p1 0.9368000000000003 p0 0.015799999999999925\n",
      "trajectorySteps 29\n",
      "[[ 1  1  1  1  1]\n",
      " [ 1  0  0  2  1]\n",
      " [ 1  0  0 16  0]\n",
      " [ 0  0  2  1  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️🔄⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0224 -0.0214 -0.0122 -0.0056 -0.0052]\n",
      " [-0.0321 -0.0072 -0.0076 -0.0088 -0.0049]\n",
      " [-0.0265 -0.0327  0.0808 -0.0087 -0.0046]\n",
      " [-0.0293  0.1296  0.6674  0.0526 -0.0058]\n",
      " [-0.0233  0.0016  0.2037  0.0076 -0.0012]]\n",
      "mean_state_value 0.03535821341844908\n",
      "episode 421/600\n",
      "p1 0.9376000000000003 p0 0.015599999999999925\n",
      "trajectorySteps 19\n",
      "[[1 1 1 1 2]\n",
      " [1 0 0 0 2]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 2]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️🔄⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0223 -0.0212 -0.0121 -0.0056 -0.0051]\n",
      " [-0.0318 -0.0072 -0.0075 -0.0087 -0.0049]\n",
      " [-0.0264 -0.0323  0.0809 -0.0086 -0.0046]\n",
      " [-0.0291  0.1297  0.6699  0.0527 -0.0057]\n",
      " [-0.0231  0.0016  0.2053  0.0078 -0.0012]]\n",
      "mean_state_value 0.03562576707433993\n",
      "episode 422/600\n",
      "p1 0.9384000000000003 p0 0.015399999999999924\n",
      "trajectorySteps 19\n",
      "[[1 1 2 2 1]\n",
      " [1 1 0 0 1]\n",
      " [1 1 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️🔄⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0222 -0.021  -0.012  -0.0055 -0.0051]\n",
      " [-0.0315 -0.0071 -0.0074 -0.0087 -0.0048]\n",
      " [-0.0264 -0.0322  0.081  -0.0085 -0.0045]\n",
      " [-0.0289  0.1299  0.6725  0.0527 -0.0057]\n",
      " [-0.023   0.0017  0.2069  0.0081 -0.0012]]\n",
      "mean_state_value 0.035887014021497944\n",
      "episode 423/600\n",
      "p1 0.9392000000000003 p0 0.015199999999999925\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️🔄⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0222 -0.0208 -0.0119 -0.0055 -0.005 ]\n",
      " [-0.0313 -0.0071 -0.0073 -0.0086 -0.0048]\n",
      " [-0.0263 -0.0319  0.0811 -0.0084 -0.0045]\n",
      " [-0.0287  0.13    0.6751  0.0528 -0.0056]\n",
      " [-0.0228  0.0017  0.2085  0.0083 -0.0011]]\n",
      "mean_state_value 0.03615530257073462\n",
      "episode 424/600\n",
      "p1 0.9400000000000003 p0 0.014999999999999925\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [2 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️🔄⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0221 -0.0207 -0.0117 -0.0055 -0.005 ]\n",
      " [-0.031  -0.007  -0.0072 -0.0085 -0.0048]\n",
      " [-0.0263 -0.0316  0.0812 -0.0083 -0.0045]\n",
      " [-0.0285  0.1302  0.6777  0.0528 -0.0055]\n",
      " [-0.0227  0.0018  0.2102  0.0085 -0.0011]]\n",
      "mean_state_value 0.03642268972016585\n",
      "episode 425/600\n",
      "p1 0.9408000000000003 p0 0.014799999999999924\n",
      "trajectorySteps 17\n",
      "[[1 1 1 2 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️🔄⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.022  -0.0205 -0.0117 -0.0055 -0.005 ]\n",
      " [-0.0307 -0.007  -0.0071 -0.0084 -0.0047]\n",
      " [-0.0262 -0.0313  0.0813 -0.0082 -0.0045]\n",
      " [-0.0282  0.1304  0.6803  0.0529 -0.0055]\n",
      " [-0.0225  0.0018  0.2118  0.0088 -0.0011]]\n",
      "mean_state_value 0.036686133310552885\n",
      "episode 426/600\n",
      "p1 0.9416000000000003 p0 0.014599999999999924\n",
      "trajectorySteps 24\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 8 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️🔄⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0219 -0.0203 -0.0116 -0.0054 -0.0049]\n",
      " [-0.0305 -0.007  -0.007  -0.0083 -0.0047]\n",
      " [-0.0261 -0.031   0.0814 -0.0081 -0.0044]\n",
      " [-0.028   0.1305  0.6829  0.053  -0.0054]\n",
      " [-0.0224  0.0018  0.2134  0.009  -0.0011]]\n",
      "mean_state_value 0.036955383171597786\n",
      "episode 427/600\n",
      "p1 0.9424000000000003 p0 0.014399999999999923\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️🔄⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0218 -0.0201 -0.0115 -0.0054 -0.0049]\n",
      " [-0.0302 -0.0069 -0.007  -0.0082 -0.0047]\n",
      " [-0.0261 -0.0307  0.0816 -0.008  -0.0044]\n",
      " [-0.0278  0.1307  0.6855  0.053  -0.0053]\n",
      " [-0.0222  0.0019  0.215   0.0092 -0.001 ]]\n",
      "mean_state_value 0.03722451005412501\n",
      "episode 428/600\n",
      "p1 0.9432000000000003 p0 0.014199999999999924\n",
      "trajectorySteps 6\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [1 2 0 0 0]\n",
      " [0 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0218 -0.0199 -0.0114 -0.0054 -0.0049]\n",
      " [-0.0299 -0.0069 -0.0069 -0.0081 -0.0047]\n",
      " [-0.026  -0.0305  0.0817 -0.0079 -0.0044]\n",
      " [-0.0276  0.1323  0.6881  0.0531 -0.0053]\n",
      " [-0.0221  0.0019  0.2152  0.0093 -0.001 ]]\n",
      "mean_state_value 0.037481209161888816\n",
      "episode 429/600\n",
      "p1 0.9440000000000003 p0 0.013999999999999924\n",
      "trajectorySteps 19\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 2]\n",
      " [1 0 2 0 2]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0217 -0.0198 -0.0113 -0.0053 -0.0048]\n",
      " [-0.0296 -0.0068 -0.0068 -0.008  -0.0046]\n",
      " [-0.0259 -0.0302  0.0818 -0.0078 -0.0043]\n",
      " [-0.0274  0.1325  0.6907  0.0531 -0.0052]\n",
      " [-0.0219  0.002   0.2168  0.0095 -0.001 ]]\n",
      "mean_state_value 0.037751165642881855\n",
      "episode 430/600\n",
      "p1 0.9448000000000003 p0 0.013799999999999923\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0216 -0.0196 -0.0111 -0.0053 -0.0048]\n",
      " [-0.0294 -0.0068 -0.0067 -0.0079 -0.0046]\n",
      " [-0.0259 -0.0299  0.0819 -0.0077 -0.0043]\n",
      " [-0.0272  0.1326  0.6933  0.0532 -0.0051]\n",
      " [-0.0218  0.002   0.2185  0.0097 -0.001 ]]\n",
      "mean_state_value 0.038021362171593555\n",
      "episode 431/600\n",
      "p1 0.9456000000000003 p0 0.013599999999999923\n",
      "trajectorySteps 18\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 2 0 2]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0216 -0.0194 -0.011  -0.0053 -0.0047]\n",
      " [-0.0291 -0.0067 -0.0066 -0.0078 -0.0046]\n",
      " [-0.0258 -0.0296  0.082  -0.0076 -0.0043]\n",
      " [-0.027   0.1328  0.6959  0.0532 -0.0051]\n",
      " [-0.0216  0.0021  0.2201  0.01   -0.001 ]]\n",
      "mean_state_value 0.03828998754433432\n",
      "episode 432/600\n",
      "p1 0.9464000000000004 p0 0.013399999999999922\n",
      "trajectorySteps 19\n",
      "[[1 1 1 1 2]\n",
      " [1 0 0 0 2]\n",
      " [2 1 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0215 -0.0192 -0.0109 -0.0053 -0.0047]\n",
      " [-0.0288 -0.0067 -0.0065 -0.0077 -0.0046]\n",
      " [-0.0257 -0.0293  0.0821 -0.0075 -0.0043]\n",
      " [-0.0267  0.133   0.6985  0.0533 -0.005 ]\n",
      " [-0.0215  0.0021  0.2217  0.0102 -0.0009]]\n",
      "mean_state_value 0.038560657338750304\n",
      "episode 433/600\n",
      "p1 0.9472000000000003 p0 0.013199999999999924\n",
      "trajectorySteps 19\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 2 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0214 -0.019  -0.0108 -0.0052 -0.0047]\n",
      " [-0.0286 -0.0066 -0.0064 -0.0076 -0.0045]\n",
      " [-0.0257 -0.029   0.0822 -0.0075 -0.0043]\n",
      " [-0.0265  0.1331  0.7011  0.0533 -0.0049]\n",
      " [-0.0213  0.0021  0.2234  0.0105 -0.0009]]\n",
      "mean_state_value 0.03883179283800031\n",
      "episode 434/600\n",
      "p1 0.9480000000000003 p0 0.012999999999999923\n",
      "trajectorySteps 19\n",
      "[[1 1 1 1 1]\n",
      " [2 0 0 0 1]\n",
      " [3 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0213 -0.0188 -0.0107 -0.0052 -0.0046]\n",
      " [-0.0283 -0.0066 -0.0063 -0.0075 -0.0045]\n",
      " [-0.0256 -0.0287  0.0823 -0.0074 -0.0042]\n",
      " [-0.0263  0.1333  0.703   0.0534 -0.0049]\n",
      " [-0.0212  0.0022  0.225   0.0107 -0.0009]]\n",
      "mean_state_value 0.039075451236266\n",
      "episode 435/600\n",
      "p1 0.9488000000000003 p0 0.012799999999999923\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0213 -0.0187 -0.0105 -0.0052 -0.0046]\n",
      " [-0.028  -0.0065 -0.0063 -0.0074 -0.0045]\n",
      " [-0.0255 -0.0284  0.0824 -0.0073 -0.0042]\n",
      " [-0.0261  0.1334  0.7056  0.0535 -0.0048]\n",
      " [-0.021   0.0022  0.2267  0.011  -0.0009]]\n",
      "mean_state_value 0.03934703704339687\n",
      "episode 436/600\n",
      "p1 0.9496000000000003 p0 0.012599999999999922\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0212 -0.0185 -0.0104 -0.0052 -0.0045]\n",
      " [-0.0278 -0.0065 -0.0062 -0.0073 -0.0044]\n",
      " [-0.0255 -0.0281  0.0825 -0.0072 -0.0042]\n",
      " [-0.0259  0.1336  0.7082  0.0535 -0.0047]\n",
      " [-0.0209  0.0023  0.2283  0.0112 -0.0008]]\n",
      "mean_state_value 0.03961905722311411\n",
      "episode 437/600\n",
      "p1 0.9504000000000004 p0 0.012399999999999922\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0211 -0.0183 -0.0103 -0.0051 -0.0045]\n",
      " [-0.0275 -0.0064 -0.0061 -0.0072 -0.0044]\n",
      " [-0.0254 -0.0278  0.0826 -0.0071 -0.0042]\n",
      " [-0.0257  0.1337  0.7109  0.0536 -0.0047]\n",
      " [-0.0207  0.0023  0.23    0.0115 -0.0008]]\n",
      "mean_state_value 0.03989140154534432\n",
      "episode 438/600\n",
      "p1 0.9512000000000003 p0 0.012199999999999923\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.021  -0.0181 -0.0102 -0.0051 -0.0045]\n",
      " [-0.0272 -0.0064 -0.006  -0.0071 -0.0044]\n",
      " [-0.0253 -0.0275  0.0827 -0.007  -0.0041]\n",
      " [-0.0254  0.1339  0.7135  0.0536 -0.0046]\n",
      " [-0.0206  0.0024  0.2316  0.0117 -0.0008]]\n",
      "mean_state_value 0.040164070173060125\n",
      "episode 439/600\n",
      "p1 0.9520000000000003 p0 0.011999999999999922\n",
      "trajectorySteps 19\n",
      "[[1 1 1 1 2]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 2]\n",
      " [0 0 2 1 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.090e-02 -1.790e-02 -1.010e-02 -5.200e-03 -4.400e-03]\n",
      " [-2.700e-02 -6.300e-03 -5.900e-03 -7.000e-03 -4.400e-03]\n",
      " [-2.530e-02 -2.720e-02  8.280e-02 -6.900e-03 -4.100e-03]\n",
      " [-2.520e-02  1.341e-01  7.161e-01  5.370e-02 -4.500e-03]\n",
      " [-2.040e-02  2.400e-03  2.332e-01  1.170e-02 -7.000e-04]]\n",
      "mean_state_value 0.040422942455782034\n",
      "episode 440/600\n",
      "p1 0.9528000000000003 p0 0.011799999999999922\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 2]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️⬅️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.090e-02 -1.780e-02 -9.900e-03 -5.100e-03 -4.400e-03]\n",
      " [-2.670e-02 -6.300e-03 -5.800e-03 -6.900e-03 -4.300e-03]\n",
      " [-2.520e-02 -2.690e-02  8.290e-02 -6.800e-03 -4.100e-03]\n",
      " [-2.500e-02  1.342e-01  7.188e-01  5.370e-02 -4.500e-03]\n",
      " [-2.030e-02  2.400e-03  2.349e-01  1.200e-02 -7.000e-04]]\n",
      "mean_state_value 0.04069474760425685\n",
      "episode 441/600\n",
      "p1 0.9536000000000003 p0 0.011599999999999921\n",
      "trajectorySteps 39\n",
      "[[ 2 13 12  1  1]\n",
      " [ 1  0  0  0  1]\n",
      " [ 1  0  0  0  1]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.080e-02 -1.750e-02 -9.800e-03 -5.100e-03 -4.400e-03]\n",
      " [-2.640e-02 -6.200e-03 -5.700e-03 -6.800e-03 -4.300e-03]\n",
      " [-2.510e-02 -2.660e-02  8.300e-02 -6.700e-03 -4.100e-03]\n",
      " [-2.480e-02  1.344e-01  7.214e-01  5.380e-02 -4.400e-03]\n",
      " [-2.010e-02  2.500e-03  2.366e-01  1.220e-02 -7.000e-04]]\n",
      "mean_state_value 0.04097091032655631\n",
      "episode 442/600\n",
      "p1 0.9544000000000004 p0 0.01139999999999992\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 2 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.070e-02 -1.730e-02 -9.700e-03 -5.100e-03 -4.300e-03]\n",
      " [-2.610e-02 -6.200e-03 -5.600e-03 -6.700e-03 -4.300e-03]\n",
      " [-2.510e-02 -2.620e-02  8.310e-02 -6.600e-03 -4.100e-03]\n",
      " [-2.460e-02  1.345e-01  7.240e-01  5.380e-02 -4.300e-03]\n",
      " [-2.000e-02  2.500e-03  2.382e-01  1.250e-02 -7.000e-04]]\n",
      "mean_state_value 0.041243746854930236\n",
      "episode 443/600\n",
      "p1 0.9552000000000003 p0 0.011199999999999922\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.060e-02 -1.720e-02 -9.600e-03 -5.100e-03 -4.300e-03]\n",
      " [-2.590e-02 -6.100e-03 -5.600e-03 -6.600e-03 -4.300e-03]\n",
      " [-2.500e-02 -2.590e-02  8.320e-02 -6.500e-03 -4.000e-03]\n",
      " [-2.440e-02  1.347e-01  7.267e-01  5.390e-02 -4.300e-03]\n",
      " [-1.980e-02  2.600e-03  2.399e-01  1.270e-02 -7.000e-04]]\n",
      "mean_state_value 0.04151803310217133\n",
      "episode 444/600\n",
      "p1 0.9560000000000003 p0 0.010999999999999921\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.060e-02 -1.700e-02 -9.500e-03 -5.000e-03 -4.200e-03]\n",
      " [-2.560e-02 -6.100e-03 -5.500e-03 -6.500e-03 -4.200e-03]\n",
      " [-2.490e-02 -2.560e-02  8.330e-02 -6.400e-03 -4.000e-03]\n",
      " [-2.410e-02  1.348e-01  7.293e-01  5.400e-02 -4.200e-03]\n",
      " [-1.970e-02  2.600e-03  2.416e-01  1.300e-02 -6.000e-04]]\n",
      "mean_state_value 0.041792474708269536\n",
      "episode 445/600\n",
      "p1 0.9568000000000003 p0 0.01079999999999992\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.050e-02 -1.680e-02 -9.300e-03 -5.000e-03 -4.200e-03]\n",
      " [-2.530e-02 -6.000e-03 -5.400e-03 -6.400e-03 -4.200e-03]\n",
      " [-2.490e-02 -2.530e-02  8.340e-02 -6.300e-03 -4.000e-03]\n",
      " [-2.390e-02  1.350e-01  7.320e-01  5.400e-02 -4.100e-03]\n",
      " [-1.950e-02  2.600e-03  2.432e-01  1.330e-02 -6.000e-04]]\n",
      "mean_state_value 0.04206723000096486\n",
      "episode 446/600\n",
      "p1 0.9576000000000003 p0 0.01059999999999992\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.040e-02 -1.660e-02 -9.200e-03 -5.000e-03 -4.200e-03]\n",
      " [-2.510e-02 -6.000e-03 -5.300e-03 -6.300e-03 -4.200e-03]\n",
      " [-2.480e-02 -2.500e-02  8.360e-02 -6.300e-03 -3.900e-03]\n",
      " [-2.370e-02  1.351e-01  7.346e-01  5.410e-02 -4.100e-03]\n",
      " [-1.940e-02  2.700e-03  2.449e-01  1.350e-02 -6.000e-04]]\n",
      "mean_state_value 0.04234242988437299\n",
      "episode 447/600\n",
      "p1 0.9584000000000004 p0 0.01039999999999992\n",
      "trajectorySteps 19\n",
      "[[1 1 1 2 2]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.030e-02 -1.640e-02 -9.100e-03 -4.900e-03 -4.100e-03]\n",
      " [-2.480e-02 -5.900e-03 -5.200e-03 -6.300e-03 -4.100e-03]\n",
      " [-2.470e-02 -2.470e-02  8.370e-02 -6.200e-03 -3.900e-03]\n",
      " [-2.350e-02  1.353e-01  7.373e-01  5.410e-02 -4.000e-03]\n",
      " [-1.920e-02  2.700e-03  2.466e-01  1.380e-02 -5.000e-04]]\n",
      "mean_state_value 0.042617852823054386\n",
      "episode 448/600\n",
      "p1 0.9592000000000003 p0 0.010199999999999921\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.030e-02 -1.630e-02 -9.000e-03 -4.900e-03 -4.100e-03]\n",
      " [-2.450e-02 -5.900e-03 -5.100e-03 -6.200e-03 -4.100e-03]\n",
      " [-2.460e-02 -2.440e-02  8.380e-02 -6.100e-03 -3.900e-03]\n",
      " [-2.330e-02  1.355e-01  7.399e-01  5.420e-02 -3.900e-03]\n",
      " [-1.910e-02  2.800e-03  2.483e-01  1.410e-02 -5.000e-04]]\n",
      "mean_state_value 0.04289403089607498\n",
      "episode 449/600\n",
      "p1 0.9600000000000003 p0 0.00999999999999992\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.020e-02 -1.610e-02 -8.900e-03 -4.900e-03 -4.000e-03]\n",
      " [-2.430e-02 -5.800e-03 -5.000e-03 -6.100e-03 -4.100e-03]\n",
      " [-2.460e-02 -2.410e-02  8.390e-02 -6.000e-03 -3.900e-03]\n",
      " [-2.310e-02  1.356e-01  7.426e-01  5.420e-02 -3.900e-03]\n",
      " [-1.890e-02  2.800e-03  2.500e-01  1.430e-02 -5.000e-04]]\n",
      "mean_state_value 0.043170523529357324\n",
      "episode 450/600\n",
      "p1 0.9608000000000003 p0 0.00979999999999992\n",
      "trajectorySteps 18\n",
      "[[2 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.010e-02 -1.590e-02 -8.700e-03 -4.900e-03 -4.000e-03]\n",
      " [-2.400e-02 -5.800e-03 -4.900e-03 -6.000e-03 -4.100e-03]\n",
      " [-2.450e-02 -2.380e-02  8.400e-02 -5.900e-03 -3.800e-03]\n",
      " [-2.280e-02  1.358e-01  7.453e-01  5.430e-02 -3.800e-03]\n",
      " [-1.880e-02  2.900e-03  2.516e-01  1.460e-02 -5.000e-04]]\n",
      "mean_state_value 0.04344682581469793\n",
      "episode 451/600\n",
      "p1 0.9616000000000003 p0 0.00959999999999992\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.000e-02 -1.570e-02 -8.600e-03 -4.800e-03 -4.000e-03]\n",
      " [-2.370e-02 -5.700e-03 -4.900e-03 -5.900e-03 -4.000e-03]\n",
      " [-2.440e-02 -2.350e-02  8.410e-02 -5.800e-03 -3.800e-03]\n",
      " [-2.260e-02  1.359e-01  7.479e-01  5.430e-02 -3.700e-03]\n",
      " [-1.860e-02  2.900e-03  2.533e-01  1.490e-02 -4.000e-04]]\n",
      "mean_state_value 0.043723947559028524\n",
      "episode 452/600\n",
      "p1 0.9624000000000004 p0 0.009399999999999919\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.990e-02 -1.550e-02 -8.500e-03 -4.800e-03 -3.900e-03]\n",
      " [-2.340e-02 -5.700e-03 -4.800e-03 -5.800e-03 -4.000e-03]\n",
      " [-2.440e-02 -2.320e-02  8.420e-02 -5.700e-03 -3.800e-03]\n",
      " [-2.240e-02  1.361e-01  7.506e-01  5.440e-02 -3.700e-03]\n",
      " [-1.850e-02  2.900e-03  2.550e-01  1.520e-02 -4.000e-04]]\n",
      "mean_state_value 0.04400111783937923\n",
      "episode 453/600\n",
      "p1 0.9632000000000003 p0 0.00919999999999992\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 2]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.990e-02 -1.530e-02 -8.400e-03 -4.800e-03 -3.900e-03]\n",
      " [-2.320e-02 -5.600e-03 -4.700e-03 -5.700e-03 -4.000e-03]\n",
      " [-2.430e-02 -2.290e-02  8.430e-02 -5.600e-03 -3.700e-03]\n",
      " [-2.220e-02  1.362e-01  7.533e-01  5.450e-02 -3.600e-03]\n",
      " [-1.830e-02  3.000e-03  2.567e-01  1.540e-02 -4.000e-04]]\n",
      "mean_state_value 0.04427886531405055\n",
      "episode 454/600\n",
      "p1 0.9640000000000003 p0 0.00899999999999992\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 2 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.980e-02 -1.520e-02 -8.300e-03 -4.700e-03 -3.800e-03]\n",
      " [-2.290e-02 -5.600e-03 -4.600e-03 -5.600e-03 -4.000e-03]\n",
      " [-2.420e-02 -2.260e-02  8.440e-02 -5.500e-03 -3.700e-03]\n",
      " [-2.200e-02  1.364e-01  7.560e-01  5.450e-02 -3.500e-03]\n",
      " [-1.820e-02  3.000e-03  2.584e-01  1.550e-02 -3.000e-04]]\n",
      "mean_state_value 0.0445467933623804\n",
      "episode 455/600\n",
      "p1 0.9648000000000003 p0 0.008799999999999919\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.970e-02 -1.500e-02 -8.100e-03 -4.700e-03 -3.800e-03]\n",
      " [-2.260e-02 -5.500e-03 -4.500e-03 -5.500e-03 -3.900e-03]\n",
      " [-2.420e-02 -2.230e-02  8.450e-02 -5.400e-03 -3.700e-03]\n",
      " [-2.180e-02  1.366e-01  7.586e-01  5.460e-02 -3.500e-03]\n",
      " [-1.800e-02  3.100e-03  2.601e-01  1.570e-02 -3.000e-04]]\n",
      "mean_state_value 0.04482517216268991\n",
      "episode 456/600\n",
      "p1 0.9656000000000003 p0 0.008599999999999918\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.960e-02 -1.480e-02 -8.000e-03 -4.700e-03 -3.800e-03]\n",
      " [-2.240e-02 -5.500e-03 -4.400e-03 -5.400e-03 -3.900e-03]\n",
      " [-2.410e-02 -2.200e-02  8.460e-02 -5.300e-03 -3.700e-03]\n",
      " [-2.150e-02  1.367e-01  7.613e-01  5.460e-02 -3.400e-03]\n",
      " [-1.790e-02  3.100e-03  2.618e-01  1.600e-02 -3.000e-04]]\n",
      "mean_state_value 0.04510386677128985\n",
      "episode 457/600\n",
      "p1 0.9664000000000004 p0 0.008399999999999918\n",
      "trajectorySteps 19\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 2]\n",
      " [2 1 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.960e-02 -1.460e-02 -7.900e-03 -4.700e-03 -3.900e-03]\n",
      " [-2.210e-02 -5.400e-03 -4.300e-03 -5.300e-03 -3.900e-03]\n",
      " [-2.400e-02 -2.170e-02  8.470e-02 -5.200e-03 -3.600e-03]\n",
      " [-2.130e-02  1.369e-01  7.630e-01  5.470e-02 -3.400e-03]\n",
      " [-1.770e-02  3.200e-03  2.628e-01  1.630e-02 -3.000e-04]]\n",
      "mean_state_value 0.045306291313827254\n",
      "episode 458/600\n",
      "p1 0.9672000000000003 p0 0.00819999999999992\n",
      "trajectorySteps 17\n",
      "[[1 1 1 2 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.950e-02 -1.440e-02 -7.800e-03 -4.600e-03 -3.800e-03]\n",
      " [-2.180e-02 -5.400e-03 -4.300e-03 -5.200e-03 -3.900e-03]\n",
      " [-2.400e-02 -2.140e-02  8.480e-02 -5.100e-03 -3.600e-03]\n",
      " [-2.110e-02  1.370e-01  7.657e-01  5.470e-02 -3.300e-03]\n",
      " [-1.760e-02  3.200e-03  2.645e-01  1.660e-02 -2.000e-04]]\n",
      "mean_state_value 0.04558551702935662\n",
      "episode 459/600\n",
      "p1 0.9680000000000003 p0 0.007999999999999919\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.940e-02 -1.420e-02 -7.700e-03 -4.600e-03 -3.800e-03]\n",
      " [-2.160e-02 -5.400e-03 -4.200e-03 -5.100e-03 -3.800e-03]\n",
      " [-2.390e-02 -2.110e-02  8.490e-02 -5.100e-03 -3.600e-03]\n",
      " [-2.090e-02  1.372e-01  7.684e-01  5.480e-02 -3.200e-03]\n",
      " [-1.740e-02  3.200e-03  2.662e-01  1.690e-02 -2.000e-04]]\n",
      "mean_state_value 0.04586459112592902\n",
      "episode 460/600\n",
      "p1 0.9688000000000003 p0 0.007799999999999918\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.930e-02 -1.410e-02 -7.500e-03 -4.600e-03 -3.700e-03]\n",
      " [-2.130e-02 -5.300e-03 -4.100e-03 -5.000e-03 -3.800e-03]\n",
      " [-2.380e-02 -2.080e-02  8.500e-02 -5.000e-03 -3.500e-03]\n",
      " [-2.070e-02  1.373e-01  7.711e-01  5.480e-02 -3.200e-03]\n",
      " [-1.720e-02  3.300e-03  2.679e-01  1.720e-02 -2.000e-04]]\n",
      "mean_state_value 0.04614446260775314\n",
      "episode 461/600\n",
      "p1 0.9696000000000004 p0 0.007599999999999918\n",
      "trajectorySteps 19\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [3 0 0 0 1]\n",
      " [2 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.930e-02 -1.390e-02 -7.400e-03 -4.500e-03 -3.700e-03]\n",
      " [-2.100e-02 -5.300e-03 -4.000e-03 -4.900e-03 -3.800e-03]\n",
      " [-2.370e-02 -2.050e-02  8.510e-02 -4.900e-03 -3.500e-03]\n",
      " [-2.050e-02  1.375e-01  7.738e-01  5.490e-02 -3.100e-03]\n",
      " [-1.710e-02  3.300e-03  2.696e-01  1.750e-02 -1.000e-04]]\n",
      "mean_state_value 0.04642393378381944\n",
      "episode 462/600\n",
      "p1 0.9704000000000004 p0 0.007399999999999918\n",
      "trajectorySteps 18\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 1 2]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.920e-02 -1.370e-02 -7.300e-03 -4.500e-03 -3.700e-03]\n",
      " [-2.080e-02 -5.200e-03 -3.900e-03 -4.800e-03 -3.700e-03]\n",
      " [-2.370e-02 -2.020e-02  8.520e-02 -4.800e-03 -3.500e-03]\n",
      " [-2.030e-02  1.377e-01  7.765e-01  5.500e-02 -3.000e-03]\n",
      " [-1.690e-02  3.400e-03  2.713e-01  1.770e-02 -1.000e-04]]\n",
      "mean_state_value 0.046704177021107365\n",
      "episode 463/600\n",
      "p1 0.9712000000000003 p0 0.007199999999999917\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 2 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.910e-02 -1.350e-02 -7.200e-03 -4.500e-03 -3.600e-03]\n",
      " [-2.050e-02 -5.200e-03 -3.800e-03 -4.700e-03 -3.700e-03]\n",
      " [-2.360e-02 -1.990e-02  8.530e-02 -4.700e-03 -3.500e-03]\n",
      " [-2.000e-02  1.378e-01  7.792e-01  5.500e-02 -3.000e-03]\n",
      " [-1.680e-02  3.400e-03  2.730e-01  1.770e-02 -1.000e-04]]\n",
      "mean_state_value 0.046973352683214654\n",
      "episode 464/600\n",
      "p1 0.9720000000000003 p0 0.006999999999999918\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.019  -0.0133 -0.0071 -0.0045 -0.0036]\n",
      " [-0.0202 -0.0051 -0.0037 -0.0046 -0.0037]\n",
      " [-0.0235 -0.0195  0.0854 -0.0046 -0.0034]\n",
      " [-0.0198  0.138   0.7819  0.0551 -0.0029]\n",
      " [-0.0166  0.0035  0.2748  0.018  -0.    ]]\n",
      "mean_state_value 0.04725449269951199\n",
      "episode 465/600\n",
      "p1 0.9728000000000003 p0 0.006799999999999917\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.019  -0.0132 -0.0069 -0.0044 -0.0035]\n",
      " [-0.0199 -0.0051 -0.0036 -0.0045 -0.0037]\n",
      " [-0.0235 -0.0192  0.0856 -0.0045 -0.0034]\n",
      " [-0.0196  0.1381  0.7847  0.0551 -0.0028]\n",
      " [-0.0165  0.0035  0.2765  0.0183  0.    ]]\n",
      "mean_state_value 0.047535950469436206\n",
      "episode 466/600\n",
      "p1 0.9736000000000004 p0 0.006599999999999917\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0189 -0.013  -0.0068 -0.0044 -0.0035]\n",
      " [-0.0197 -0.005  -0.0036 -0.0044 -0.0036]\n",
      " [-0.0234 -0.0189  0.0857 -0.0044 -0.0034]\n",
      " [-0.0194  0.1383  0.7874  0.0552 -0.0028]\n",
      " [-0.0163  0.0035  0.2782  0.0186  0.    ]]\n",
      "mean_state_value 0.047817244232337665\n",
      "episode 467/600\n",
      "p1 0.9744000000000004 p0 0.006399999999999917\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.880e-02 -1.280e-02 -6.700e-03 -4.400e-03 -3.500e-03]\n",
      " [-1.940e-02 -5.000e-03 -3.500e-03 -4.300e-03 -3.600e-03]\n",
      " [-2.330e-02 -1.860e-02  8.580e-02 -4.300e-03 -3.300e-03]\n",
      " [-1.920e-02  1.384e-01  7.901e-01  5.520e-02 -2.700e-03]\n",
      " [-1.620e-02  3.600e-03  2.800e-01  1.890e-02  1.000e-04]]\n",
      "mean_state_value 0.04809898816611519\n",
      "episode 468/600\n",
      "p1 0.9752000000000003 p0 0.0061999999999999165\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.870e-02 -1.260e-02 -6.600e-03 -4.300e-03 -3.400e-03]\n",
      " [-1.910e-02 -4.900e-03 -3.400e-03 -4.200e-03 -3.600e-03]\n",
      " [-2.330e-02 -1.830e-02  8.590e-02 -4.200e-03 -3.300e-03]\n",
      " [-1.900e-02  1.386e-01  7.928e-01  5.530e-02 -2.600e-03]\n",
      " [-1.600e-02  3.600e-03  2.817e-01  1.920e-02  1.000e-04]]\n",
      "mean_state_value 0.048381222504650656\n",
      "episode 469/600\n",
      "p1 0.9760000000000003 p0 0.005999999999999917\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.860e-02 -1.240e-02 -6.400e-03 -4.300e-03 -3.400e-03]\n",
      " [-1.890e-02 -4.900e-03 -3.300e-03 -4.100e-03 -3.600e-03]\n",
      " [-2.320e-02 -1.800e-02  8.600e-02 -4.100e-03 -3.300e-03]\n",
      " [-1.870e-02  1.388e-01  7.955e-01  5.530e-02 -2.600e-03]\n",
      " [-1.590e-02  3.700e-03  2.834e-01  1.950e-02  1.000e-04]]\n",
      "mean_state_value 0.048663956039202594\n",
      "episode 470/600\n",
      "p1 0.9768000000000003 p0 0.005799999999999916\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 0]\n",
      " [2 0 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.860e-02 -1.220e-02 -6.300e-03 -4.300e-03 -3.300e-03]\n",
      " [-1.860e-02 -4.800e-03 -3.200e-03 -4.000e-03 -3.500e-03]\n",
      " [-2.310e-02 -1.770e-02  8.610e-02 -4.000e-03 -3.300e-03]\n",
      " [-1.850e-02  1.389e-01  7.983e-01  5.540e-02 -2.500e-03]\n",
      " [-1.570e-02  3.700e-03  2.852e-01  1.980e-02  2.000e-04]]\n",
      "mean_state_value 0.04894674052006163\n",
      "episode 471/600\n",
      "p1 0.9776000000000004 p0 0.005599999999999916\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 2]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬆️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.850e-02 -1.210e-02 -6.200e-03 -4.300e-03 -3.300e-03]\n",
      " [-1.830e-02 -4.800e-03 -3.100e-03 -3.900e-03 -3.500e-03]\n",
      " [-2.310e-02 -1.740e-02  8.620e-02 -4.000e-03 -3.200e-03]\n",
      " [-1.830e-02  1.391e-01  8.010e-01  5.550e-02 -2.400e-03]\n",
      " [-1.560e-02  3.700e-03  2.869e-01  2.010e-02  2.000e-04]]\n",
      "mean_state_value 0.049229009845424845\n",
      "episode 472/600\n",
      "p1 0.9784000000000004 p0 0.005399999999999916\n",
      "trajectorySteps 99\n",
      "[[ 1  1  1  1 42]\n",
      " [ 1  0  0  0 42]\n",
      " [ 2  0  0  0  1]\n",
      " [ 1  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬆️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.840e-02 -1.190e-02 -6.100e-03 -4.200e-03 -3.300e-03]\n",
      " [-1.810e-02 -4.700e-03 -3.000e-03 -3.900e-03 -3.500e-03]\n",
      " [-2.300e-02 -1.710e-02  8.630e-02 -3.900e-03 -3.200e-03]\n",
      " [-1.810e-02  1.392e-01  8.037e-01  5.550e-02 -2.400e-03]\n",
      " [-1.540e-02  3.800e-03  2.887e-01  2.040e-02  2.000e-04]]\n",
      "mean_state_value 0.04951243360845017\n",
      "episode 473/600\n",
      "p1 0.9792000000000003 p0 0.005199999999999916\n",
      "trajectorySteps 257\n",
      "[[  1   1   1   1 122]\n",
      " [  1   0   0   0 121]\n",
      " [  2   0   0   0   1]\n",
      " [  0   0   2   0   1]\n",
      " [  0   0   1   1   1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬆️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.830e-02 -1.170e-02 -6.000e-03 -4.200e-03 -3.300e-03]\n",
      " [-1.780e-02 -4.700e-03 -2.900e-03 -3.800e-03 -3.500e-03]\n",
      " [-2.290e-02 -1.680e-02  8.640e-02 -3.800e-03 -3.200e-03]\n",
      " [-1.790e-02  1.394e-01  8.065e-01  5.560e-02 -2.300e-03]\n",
      " [-1.530e-02  3.800e-03  2.904e-01  2.080e-02  3.000e-04]]\n",
      "mean_state_value 0.04979252244506009\n",
      "episode 474/600\n",
      "p1 0.9800000000000003 p0 0.004999999999999916\n",
      "trajectorySteps 40\n",
      "[[ 1  1  1  1 13]\n",
      " [ 1  0  0  0 13]\n",
      " [ 2  0  0  0  1]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬆️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.830e-02 -1.150e-02 -5.800e-03 -4.200e-03 -3.300e-03]\n",
      " [-1.750e-02 -4.600e-03 -2.900e-03 -3.700e-03 -3.400e-03]\n",
      " [-2.290e-02 -1.650e-02  8.650e-02 -3.700e-03 -3.200e-03]\n",
      " [-1.770e-02  1.395e-01  8.092e-01  5.560e-02 -2.200e-03]\n",
      " [-1.510e-02  3.900e-03  2.922e-01  2.110e-02  3.000e-04]]\n",
      "mean_state_value 0.05007674450556216\n",
      "episode 475/600\n",
      "p1 0.9808000000000003 p0 0.0047999999999999154\n",
      "trajectorySteps 558\n",
      "[[  1   1   1   4 274]\n",
      " [  1   0   0   0 267]\n",
      " [  2   0   0   0   1]\n",
      " [  0   0   2   0   1]\n",
      " [  0   0   1   1   1]]\n",
      "➡️➡️➡️➡️🔄\n",
      "⬆️⏬⏩️⬇️🔄\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.820e-02 -1.130e-02 -5.700e-03 -4.100e-03 -3.300e-03]\n",
      " [-1.720e-02 -4.600e-03 -2.800e-03 -3.600e-03 -3.500e-03]\n",
      " [-2.280e-02 -1.620e-02  8.660e-02 -3.600e-03 -3.100e-03]\n",
      " [-1.750e-02  1.397e-01  8.109e-01  5.570e-02 -2.200e-03]\n",
      " [-1.500e-02  3.900e-03  2.930e-01  2.140e-02  4.000e-04]]\n",
      "mean_state_value 0.05027388002127893\n",
      "episode 476/600\n",
      "p1 0.9816000000000004 p0 0.004599999999999915\n",
      "trajectorySteps 500\n",
      "[[  1   1   1   1 233]\n",
      " [  1   0   0   1 251]\n",
      " [  2   0   0   1   1]\n",
      " [  0   0   2   0   1]\n",
      " [  0   0   1   1   1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.810e-02 -1.120e-02 -5.600e-03 -4.100e-03 -3.300e-03]\n",
      " [-1.700e-02 -4.500e-03 -2.700e-03 -3.500e-03 -3.500e-03]\n",
      " [-2.270e-02 -1.590e-02  8.670e-02 -3.500e-03 -3.100e-03]\n",
      " [-1.720e-02  1.399e-01  8.137e-01  5.570e-02 -2.100e-03]\n",
      " [-1.480e-02  4.000e-03  2.947e-01  2.170e-02  4.000e-04]]\n",
      "mean_state_value 0.050556851249393846\n",
      "episode 477/600\n",
      "p1 0.9824000000000004 p0 0.004399999999999915\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.800e-02 -1.100e-02 -5.500e-03 -4.100e-03 -3.300e-03]\n",
      " [-1.670e-02 -4.500e-03 -2.600e-03 -3.400e-03 -3.500e-03]\n",
      " [-2.260e-02 -1.560e-02  8.680e-02 -3.400e-03 -3.100e-03]\n",
      " [-1.700e-02  1.400e-01  8.164e-01  5.580e-02 -2.000e-03]\n",
      " [-1.470e-02  4.000e-03  2.965e-01  2.200e-02  4.000e-04]]\n",
      "mean_state_value 0.05084219187703666\n",
      "episode 478/600\n",
      "p1 0.9832000000000003 p0 0.004199999999999915\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.800e-02 -1.080e-02 -5.400e-03 -4.100e-03 -3.200e-03]\n",
      " [-1.640e-02 -4.400e-03 -2.500e-03 -3.300e-03 -3.400e-03]\n",
      " [-2.260e-02 -1.520e-02  8.690e-02 -3.300e-03 -3.000e-03]\n",
      " [-1.680e-02  1.402e-01  8.192e-01  5.590e-02 -2.000e-03]\n",
      " [-1.450e-02  4.000e-03  2.982e-01  2.230e-02  5.000e-04]]\n",
      "mean_state_value 0.051127504962509535\n",
      "episode 479/600\n",
      "p1 0.9840000000000003 p0 0.003999999999999915\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.790e-02 -1.060e-02 -5.200e-03 -4.000e-03 -3.200e-03]\n",
      " [-1.620e-02 -4.400e-03 -2.400e-03 -3.200e-03 -3.400e-03]\n",
      " [-2.250e-02 -1.490e-02  8.700e-02 -3.200e-03 -3.000e-03]\n",
      " [-1.660e-02  1.403e-01  8.219e-01  5.590e-02 -1.900e-03]\n",
      " [-1.440e-02  4.100e-03  3.000e-01  2.260e-02  5.000e-04]]\n",
      "mean_state_value 0.051413372010507906\n",
      "episode 480/600\n",
      "p1 0.9848000000000003 p0 0.0037999999999999146\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 2 2 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.780e-02 -1.040e-02 -5.100e-03 -4.000e-03 -3.200e-03]\n",
      " [-1.590e-02 -4.300e-03 -2.300e-03 -3.100e-03 -3.400e-03]\n",
      " [-2.240e-02 -1.460e-02  8.710e-02 -3.100e-03 -3.000e-03]\n",
      " [-1.640e-02  1.405e-01  8.247e-01  5.600e-02 -1.800e-03]\n",
      " [-1.420e-02  4.100e-03  3.018e-01  2.290e-02  5.000e-04]]\n",
      "mean_state_value 0.05169873963274366\n",
      "episode 481/600\n",
      "p1 0.9856000000000004 p0 0.0035999999999999145\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.770e-02 -1.020e-02 -5.000e-03 -4.000e-03 -3.100e-03]\n",
      " [-1.560e-02 -4.300e-03 -2.300e-03 -3.000e-03 -3.400e-03]\n",
      " [-2.240e-02 -1.430e-02  8.720e-02 -3.000e-03 -3.000e-03]\n",
      " [-1.620e-02  1.406e-01  8.274e-01  5.600e-02 -1.800e-03]\n",
      " [-1.410e-02  4.200e-03  3.035e-01  2.330e-02  6.000e-04]]\n",
      "mean_state_value 0.05198501766618073\n",
      "episode 482/600\n",
      "p1 0.9864000000000004 p0 0.0033999999999999144\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.770e-02 -1.010e-02 -4.900e-03 -4.000e-03 -3.100e-03]\n",
      " [-1.540e-02 -4.200e-03 -2.200e-03 -2.900e-03 -3.300e-03]\n",
      " [-2.230e-02 -1.400e-02  8.730e-02 -2.900e-03 -2.900e-03]\n",
      " [-1.590e-02  1.408e-01  8.302e-01  5.610e-02 -1.700e-03]\n",
      " [-1.390e-02  4.200e-03  3.053e-01  2.360e-02  6.000e-04]]\n",
      "mean_state_value 0.05227186665842118\n",
      "episode 483/600\n",
      "p1 0.9872000000000003 p0 0.003199999999999914\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.760e-02 -9.900e-03 -4.800e-03 -3.900e-03 -3.000e-03]\n",
      " [-1.510e-02 -4.200e-03 -2.100e-03 -2.800e-03 -3.300e-03]\n",
      " [-2.220e-02 -1.370e-02  8.740e-02 -2.800e-03 -2.900e-03]\n",
      " [-1.570e-02  1.410e-01  8.330e-01  5.610e-02 -1.600e-03]\n",
      " [-1.380e-02  4.300e-03  3.071e-01  2.390e-02  7.000e-04]]\n",
      "mean_state_value 0.05255865504397403\n",
      "episode 484/600\n",
      "p1 0.9880000000000003 p0 0.0029999999999999138\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.750e-02 -9.700e-03 -4.600e-03 -3.900e-03 -3.000e-03]\n",
      " [-1.480e-02 -4.100e-03 -2.000e-03 -2.700e-03 -3.300e-03]\n",
      " [-2.220e-02 -1.340e-02  8.760e-02 -2.800e-03 -2.900e-03]\n",
      " [-1.550e-02  1.411e-01  8.358e-01  5.620e-02 -1.600e-03]\n",
      " [-1.360e-02  4.300e-03  3.089e-01  2.420e-02  7.000e-04]]\n",
      "mean_state_value 0.052846192967864\n",
      "episode 485/600\n",
      "p1 0.9888000000000003 p0 0.0027999999999999137\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.740e-02 -9.500e-03 -4.500e-03 -3.900e-03 -3.000e-03]\n",
      " [-1.450e-02 -4.100e-03 -1.900e-03 -2.600e-03 -3.200e-03]\n",
      " [-2.210e-02 -1.310e-02  8.770e-02 -2.700e-03 -2.800e-03]\n",
      " [-1.530e-02  1.413e-01  8.385e-01  5.620e-02 -1.500e-03]\n",
      " [-1.350e-02  4.300e-03  3.107e-01  2.460e-02  7.000e-04]]\n",
      "mean_state_value 0.05313362277477272\n",
      "episode 486/600\n",
      "p1 0.9896000000000004 p0 0.0025999999999999136\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.730e-02 -9.300e-03 -4.400e-03 -3.800e-03 -2.900e-03]\n",
      " [-1.430e-02 -4.000e-03 -1.800e-03 -2.500e-03 -3.200e-03]\n",
      " [-2.200e-02 -1.280e-02  8.780e-02 -2.600e-03 -2.800e-03]\n",
      " [-1.510e-02  1.414e-01  8.413e-01  5.630e-02 -1.400e-03]\n",
      " [-1.330e-02  4.400e-03  3.125e-01  2.490e-02  8.000e-04]]\n",
      "mean_state_value 0.053421373580678375\n",
      "episode 487/600\n",
      "p1 0.9904000000000004 p0 0.0023999999999999135\n",
      "trajectorySteps 620\n",
      "[[  0   0   0   0   0]\n",
      " [  0   0   0   0   0]\n",
      " [  1   0   0   0   0]\n",
      " [308   0   2   0   0]\n",
      " [307   1   1   0   0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.730e-02 -9.200e-03 -4.300e-03 -3.800e-03 -2.900e-03]\n",
      " [-1.400e-02 -4.000e-03 -1.700e-03 -2.400e-03 -3.200e-03]\n",
      " [-2.200e-02 -1.250e-02  8.790e-02 -2.500e-03 -2.800e-03]\n",
      " [-1.490e-02  1.416e-01  8.441e-01  5.640e-02 -1.300e-03]\n",
      " [-1.360e-02  4.700e-03  3.142e-01  2.490e-02  8.000e-04]]\n",
      "mean_state_value 0.05369164579136038\n",
      "episode 488/600\n",
      "p1 0.9912000000000003 p0 0.002199999999999913\n",
      "trajectorySteps 16\n",
      "[[1 1 1 0 0]\n",
      " [1 0 1 1 0]\n",
      " [2 0 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.720e-02 -9.500e-03 -4.200e-03 -3.800e-03 -2.800e-03]\n",
      " [-1.370e-02 -3.900e-03 -1.600e-03 -2.300e-03 -3.200e-03]\n",
      " [-2.190e-02 -1.220e-02  8.800e-02 -2.400e-03 -2.800e-03]\n",
      " [-1.470e-02  1.417e-01  8.469e-01  5.640e-02 -1.300e-03]\n",
      " [-1.340e-02  4.800e-03  3.160e-01  2.530e-02  8.000e-04]]\n",
      "mean_state_value 0.05395793620695111\n",
      "episode 489/600\n",
      "p1 0.9920000000000003 p0 0.001999999999999913\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0171 -0.0093 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0135 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0218 -0.0119  0.0881 -0.0023 -0.0027]\n",
      " [-0.0144  0.1419  0.8497  0.0565 -0.0012]\n",
      " [-0.0133  0.0048  0.3178  0.0256  0.0009]]\n",
      "mean_state_value 0.05424726145984638\n",
      "episode 490/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0171 -0.0093 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0135 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0218 -0.0119  0.0881 -0.0023 -0.0027]\n",
      " [-0.0144  0.1419  0.8516  0.0565 -0.0012]\n",
      " [-0.0133  0.0048  0.3194  0.0259  0.0009]]\n",
      "mean_state_value 0.05440019457883509\n",
      "episode 491/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 2 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0171 -0.0093 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0135 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0218 -0.0119  0.0881 -0.0023 -0.0027]\n",
      " [-0.0144  0.1419  0.8536  0.0565 -0.0012]\n",
      " [-0.0133  0.0048  0.3209  0.0262  0.0009]]\n",
      "mean_state_value 0.054553187376389686\n",
      "episode 492/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 2 1 1]\n",
      " [0 0 0 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.710e-02 -9.300e-03 -4.100e-03 -3.800e-03 -2.800e-03]\n",
      " [-1.350e-02 -3.900e-03 -1.600e-03 -2.200e-03 -3.100e-03]\n",
      " [-2.180e-02 -1.190e-02  8.810e-02 -2.300e-03 -2.700e-03]\n",
      " [-1.440e-02  1.419e-01  8.556e-01  5.820e-02 -1.200e-03]\n",
      " [-1.330e-02  4.800e-03  3.209e-01  2.620e-02  8.000e-04]]\n",
      "mean_state_value 0.05469812041550606\n",
      "episode 493/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.710e-02 -9.300e-03 -4.100e-03 -3.800e-03 -2.800e-03]\n",
      " [-1.350e-02 -3.900e-03 -1.600e-03 -2.200e-03 -3.100e-03]\n",
      " [-2.180e-02 -1.190e-02  8.810e-02 -2.300e-03 -2.700e-03]\n",
      " [-1.440e-02  1.419e-01  8.575e-01  5.820e-02 -1.200e-03]\n",
      " [-1.330e-02  4.800e-03  3.224e-01  2.650e-02  8.000e-04]]\n",
      "mean_state_value 0.0548508003212172\n",
      "episode 494/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.710e-02 -9.300e-03 -4.100e-03 -3.800e-03 -2.800e-03]\n",
      " [-1.350e-02 -3.900e-03 -1.600e-03 -2.200e-03 -3.100e-03]\n",
      " [-2.180e-02 -1.190e-02  8.810e-02 -2.300e-03 -2.700e-03]\n",
      " [-1.440e-02  1.419e-01  8.595e-01  5.820e-02 -1.200e-03]\n",
      " [-1.330e-02  4.800e-03  3.239e-01  2.670e-02  8.000e-04]]\n",
      "mean_state_value 0.05500397903515504\n",
      "episode 495/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 2 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0171 -0.0093 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0135 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0218 -0.0119  0.0881 -0.0023 -0.0027]\n",
      " [-0.0144  0.1419  0.8615  0.0582 -0.0012]\n",
      " [-0.0133  0.0048  0.3254  0.027   0.0009]]\n",
      "mean_state_value 0.05515718272265707\n",
      "episode 496/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0171 -0.0093 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0135 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0217 -0.0119  0.0881 -0.0023 -0.0027]\n",
      " [-0.0144  0.1419  0.8634  0.0582 -0.0012]\n",
      " [-0.0133  0.0048  0.327   0.0273  0.0009]]\n",
      "mean_state_value 0.055310090031730354\n",
      "episode 497/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0171 -0.0093 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0135 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0217 -0.0119  0.0881 -0.0023 -0.0027]\n",
      " [-0.0144  0.1419  0.8654  0.0582 -0.0012]\n",
      " [-0.0133  0.0048  0.3285  0.0276  0.0009]]\n",
      "mean_state_value 0.05546307345441913\n",
      "episode 498/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.017  -0.0093 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0135 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0217 -0.0119  0.0881 -0.0023 -0.0027]\n",
      " [-0.0144  0.1419  0.8674  0.0582 -0.0012]\n",
      " [-0.0133  0.0048  0.33    0.0279  0.001 ]]\n",
      "mean_state_value 0.05561655313027659\n",
      "episode 499/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.017  -0.0093 -0.0041 -0.0037 -0.0028]\n",
      " [-0.0135 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0217 -0.012   0.0881 -0.0023 -0.0027]\n",
      " [-0.0144  0.1419  0.8693  0.0582 -0.0012]\n",
      " [-0.0133  0.0048  0.3315  0.0282  0.001 ]]\n",
      "mean_state_value 0.05576968829540066\n",
      "episode 500/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.017  -0.0093 -0.0041 -0.0037 -0.0028]\n",
      " [-0.0135 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0217 -0.012   0.0881 -0.0023 -0.0027]\n",
      " [-0.0144  0.1419  0.8713  0.0582 -0.0012]\n",
      " [-0.0133  0.0048  0.3331  0.0285  0.001 ]]\n",
      "mean_state_value 0.055923033326842446\n",
      "episode 501/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 19\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 2]\n",
      " [0 0 2 0 2]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.017  -0.0093 -0.0041 -0.0037 -0.0028]\n",
      " [-0.0135 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0217 -0.012   0.0881 -0.0023 -0.0027]\n",
      " [-0.0144  0.1419  0.8733  0.0582 -0.0012]\n",
      " [-0.0133  0.0048  0.3346  0.0288  0.001 ]]\n",
      "mean_state_value 0.056076393272278445\n",
      "episode 502/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.017  -0.0093 -0.0041 -0.0037 -0.0028]\n",
      " [-0.0135 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0217 -0.012   0.0881 -0.0023 -0.0027]\n",
      " [-0.0145  0.1419  0.8752  0.0582 -0.0012]\n",
      " [-0.0133  0.0048  0.3361  0.0291  0.0011]]\n",
      "mean_state_value 0.0562298898959753\n",
      "episode 503/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.017  -0.0093 -0.0041 -0.0037 -0.0028]\n",
      " [-0.0135 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0217 -0.012   0.0881 -0.0023 -0.0027]\n",
      " [-0.0145  0.1419  0.8772  0.0582 -0.0012]\n",
      " [-0.0133  0.0048  0.3376  0.0294  0.0011]]\n",
      "mean_state_value 0.0563837457204082\n",
      "episode 504/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 2 2 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.017  -0.0093 -0.0041 -0.0037 -0.0028]\n",
      " [-0.0135 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0217 -0.012   0.0881 -0.0023 -0.0027]\n",
      " [-0.0145  0.1419  0.8792  0.0582 -0.0012]\n",
      " [-0.0133  0.0048  0.3392  0.0297  0.0011]]\n",
      "mean_state_value 0.05653653100719861\n",
      "episode 505/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.017  -0.0093 -0.0041 -0.0037 -0.0028]\n",
      " [-0.0135 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0217 -0.012   0.0881 -0.0023 -0.0027]\n",
      " [-0.0145  0.1419  0.8811  0.0582 -0.0012]\n",
      " [-0.0133  0.0048  0.3407  0.03    0.0011]]\n",
      "mean_state_value 0.05669053732523405\n",
      "episode 506/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.017  -0.0093 -0.0041 -0.0037 -0.0028]\n",
      " [-0.0136 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0216 -0.012   0.0881 -0.0023 -0.0027]\n",
      " [-0.0145  0.1419  0.8831  0.0582 -0.0012]\n",
      " [-0.0133  0.0048  0.3422  0.0303  0.0012]]\n",
      "mean_state_value 0.05684467493206026\n",
      "episode 507/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.017  -0.0092 -0.0041 -0.0037 -0.0028]\n",
      " [-0.0136 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0216 -0.012   0.0881 -0.0023 -0.0027]\n",
      " [-0.0145  0.1419  0.8851  0.0582 -0.0012]\n",
      " [-0.0133  0.0048  0.3437  0.0307  0.0012]]\n",
      "mean_state_value 0.056998887606467596\n",
      "episode 508/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.017  -0.0092 -0.0041 -0.0037 -0.0028]\n",
      " [-0.0136 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0216 -0.012   0.0881 -0.0023 -0.0027]\n",
      " [-0.0145  0.1419  0.887   0.0582 -0.0012]\n",
      " [-0.0133  0.0048  0.3453  0.031   0.0012]]\n",
      "mean_state_value 0.05715317562861223\n",
      "episode 509/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.017  -0.0092 -0.0041 -0.0037 -0.0028]\n",
      " [-0.0136 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0216 -0.012   0.0881 -0.0023 -0.0027]\n",
      " [-0.0145  0.1419  0.889   0.0582 -0.0012]\n",
      " [-0.0133  0.0048  0.3468  0.0313  0.0013]]\n",
      "mean_state_value 0.05730753885581751\n",
      "episode 510/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬇️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0169 -0.0092 -0.0041 -0.0037 -0.0028]\n",
      " [-0.0136 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0216 -0.012   0.0881 -0.0023 -0.0027]\n",
      " [-0.0145  0.1419  0.891   0.0582 -0.0012]\n",
      " [-0.0133  0.0048  0.3483  0.0316  0.0013]]\n",
      "mean_state_value 0.05746207635988537\n",
      "episode 511/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 1610\n",
      "[[  1   1   1   1   2]\n",
      " [794   1   0   0   1]\n",
      " [796   5   0   0   1]\n",
      " [  0   0   2   0   1]\n",
      " [  0   0   1   1   1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0169 -0.0092 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0136 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0188 -0.012   0.0881 -0.0023 -0.0027]\n",
      " [-0.0145  0.1419  0.8929  0.0582 -0.0012]\n",
      " [-0.0133  0.0048  0.3498  0.0319  0.0013]]\n",
      "mean_state_value 0.057722338602686475\n",
      "episode 512/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0169 -0.0092 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0136 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0188 -0.012   0.0881 -0.0023 -0.0027]\n",
      " [-0.0145  0.1419  0.8949  0.0582 -0.0012]\n",
      " [-0.0133  0.0048  0.3514  0.0322  0.0014]]\n",
      "mean_state_value 0.057876458351402246\n",
      "episode 513/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0169 -0.0092 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0136 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0187 -0.012   0.0881 -0.0023 -0.0027]\n",
      " [-0.0145  0.1419  0.8969  0.0582 -0.0012]\n",
      " [-0.0133  0.0048  0.3529  0.0325  0.0014]]\n",
      "mean_state_value 0.05803065405618212\n",
      "episode 514/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0169 -0.0092 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0136 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0187 -0.012   0.0881 -0.0023 -0.0027]\n",
      " [-0.0145  0.1419  0.8988  0.0582 -0.0012]\n",
      " [-0.0133  0.0048  0.3544  0.0329  0.0014]]\n",
      "mean_state_value 0.058185280088924166\n",
      "episode 515/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️⬅️➡️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0169 -0.0092 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0136 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0187 -0.012   0.0881 -0.0023 -0.0027]\n",
      " [-0.0145  0.1419  0.9008  0.0582 -0.0012]\n",
      " [-0.0133  0.0048  0.356   0.0332  0.0014]]\n",
      "mean_state_value 0.05833963880803874\n",
      "episode 516/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 1496\n",
      "[[  2 739 738   0   0]\n",
      " [  2   1   1   1   0]\n",
      " [  3   1   0   1   1]\n",
      " [  0   0   2   0   1]\n",
      " [  0   0   1   1   1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0169 -0.0083 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0136 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0187 -0.0121  0.0881 -0.0023 -0.0027]\n",
      " [-0.0145  0.1419  0.9028  0.0582 -0.0012]\n",
      " [-0.0133  0.0048  0.3575  0.0335  0.0015]]\n",
      "mean_state_value 0.05853021670329903\n",
      "episode 517/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0169 -0.0083 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0136 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0187 -0.0121  0.0881 -0.0023 -0.0027]\n",
      " [-0.0145  0.1419  0.9047  0.0582 -0.0012]\n",
      " [-0.0133  0.0048  0.359   0.0338  0.0015]]\n",
      "mean_state_value 0.058685012499274374\n",
      "episode 518/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0169 -0.0083 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0136 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0187 -0.0121  0.0881 -0.0023 -0.0027]\n",
      " [-0.0145  0.1419  0.9067  0.0582 -0.0012]\n",
      " [-0.0133  0.0048  0.3606  0.0341  0.0015]]\n",
      "mean_state_value 0.05883958685219153\n",
      "episode 519/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0169 -0.0083 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0137 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0187 -0.0121  0.0881 -0.0023 -0.0027]\n",
      " [-0.0145  0.1419  0.9086  0.0582 -0.0012]\n",
      " [-0.0133  0.0048  0.3621  0.0345  0.0016]]\n",
      "mean_state_value 0.05899453330785394\n",
      "episode 520/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0168 -0.0083 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0137 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0187 -0.0121  0.0881 -0.0023 -0.0027]\n",
      " [-0.0145  0.1419  0.9106  0.0582 -0.0012]\n",
      " [-0.0133  0.0048  0.3636  0.0348  0.0016]]\n",
      "mean_state_value 0.059149610486833114\n",
      "episode 521/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0168 -0.0083 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0137 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0187 -0.0121  0.0881 -0.0023 -0.0027]\n",
      " [-0.0145  0.1419  0.9126  0.0582 -0.0012]\n",
      " [-0.0133  0.0048  0.3652  0.0351  0.0016]]\n",
      "mean_state_value 0.059304542325852506\n",
      "episode 522/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 18\n",
      "[[1 1 1 1 1]\n",
      " [2 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0168 -0.0083 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0137 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0187 -0.0121  0.0881 -0.0023 -0.0027]\n",
      " [-0.0145  0.1419  0.9145  0.0582 -0.0012]\n",
      " [-0.0133  0.0048  0.3667  0.0354  0.0017]]\n",
      "mean_state_value 0.05945860610261377\n",
      "episode 523/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0168 -0.0083 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0137 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0187 -0.0121  0.0881 -0.0023 -0.0027]\n",
      " [-0.0145  0.1419  0.9165  0.0582 -0.0011]\n",
      " [-0.0133  0.0048  0.3682  0.0358  0.0017]]\n",
      "mean_state_value 0.05961385461287772\n",
      "episode 524/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 0]\n",
      " [2 0 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0168 -0.0083 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0137 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0187 -0.0121  0.0881 -0.0023 -0.0027]\n",
      " [-0.0145  0.1419  0.9185  0.0582 -0.0011]\n",
      " [-0.0133  0.0048  0.3698  0.0361  0.0017]]\n",
      "mean_state_value 0.05976912870479133\n",
      "episode 525/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0168 -0.0082 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0137 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0187 -0.0121  0.0881 -0.0023 -0.0027]\n",
      " [-0.0145  0.1419  0.9204  0.0582 -0.0011]\n",
      " [-0.0133  0.0048  0.3713  0.0364  0.0018]]\n",
      "mean_state_value 0.05992458354004757\n",
      "episode 526/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0168 -0.0082 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0137 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0187 -0.0121  0.0881 -0.0023 -0.0027]\n",
      " [-0.0145  0.1419  0.9224  0.0582 -0.0011]\n",
      " [-0.0133  0.0048  0.3728  0.0368  0.0018]]\n",
      "mean_state_value 0.06008011328613343\n",
      "episode 527/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0168 -0.0082 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0137 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0187 -0.0121  0.0881 -0.0023 -0.0027]\n",
      " [-0.0145  0.1419  0.9244  0.0582 -0.0011]\n",
      " [-0.0133  0.0048  0.3744  0.0371  0.0019]]\n",
      "mean_state_value 0.06023571831752113\n",
      "episode 528/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0168 -0.0082 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0137 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0187 -0.0121  0.0881 -0.0023 -0.0027]\n",
      " [-0.0145  0.1419  0.9263  0.0582 -0.0011]\n",
      " [-0.0133  0.0048  0.3759  0.0374  0.0019]]\n",
      "mean_state_value 0.06039134332222359\n",
      "episode 529/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0168 -0.0082 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0137 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0187 -0.0121  0.0881 -0.0023 -0.0027]\n",
      " [-0.0145  0.1419  0.9283  0.0582 -0.0011]\n",
      " [-0.0133  0.0048  0.3774  0.0378  0.0019]]\n",
      "mean_state_value 0.06054687997029967\n",
      "episode 530/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0167 -0.0082 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0137 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0186 -0.0121  0.0881 -0.0023 -0.0026]\n",
      " [-0.0145  0.1419  0.9303  0.0582 -0.0011]\n",
      " [-0.0133  0.0048  0.379   0.0381  0.002 ]]\n",
      "mean_state_value 0.06070271084668684\n",
      "episode 531/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0167 -0.0082 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0137 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0186 -0.0121  0.0881 -0.0023 -0.0026]\n",
      " [-0.0145  0.1419  0.9322  0.0582 -0.0011]\n",
      " [-0.0133  0.0048  0.3805  0.0384  0.002 ]]\n",
      "mean_state_value 0.06085856174794888\n",
      "episode 532/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0167 -0.0082 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0137 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0186 -0.0121  0.0881 -0.0023 -0.0026]\n",
      " [-0.0145  0.1419  0.9342  0.0582 -0.0011]\n",
      " [-0.0133  0.0048  0.382   0.0388  0.002 ]]\n",
      "mean_state_value 0.06101432529388615\n",
      "episode 533/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 2]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0167 -0.0082 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0137 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0186 -0.0121  0.0881 -0.0023 -0.0026]\n",
      " [-0.0145  0.1419  0.9362  0.0582 -0.0011]\n",
      " [-0.0133  0.0048  0.3836  0.0391  0.0021]]\n",
      "mean_state_value 0.061170368161136655\n",
      "episode 534/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0167 -0.0082 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0137 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0186 -0.0121  0.0881 -0.0023 -0.0026]\n",
      " [-0.0145  0.1419  0.9381  0.0582 -0.0011]\n",
      " [-0.0133  0.0048  0.3851  0.0395  0.0021]]\n",
      "mean_state_value 0.061326500138847495\n",
      "episode 535/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0167 -0.0082 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0137 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0186 -0.0121  0.0881 -0.0023 -0.0026]\n",
      " [-0.0145  0.1419  0.9401  0.0582 -0.0011]\n",
      " [-0.0133  0.0048  0.3867  0.0398  0.0021]]\n",
      "mean_state_value 0.0614826521916591\n",
      "episode 536/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0167 -0.0082 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0137 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0186 -0.0121  0.0881 -0.0023 -0.0026]\n",
      " [-0.0145  0.1419  0.9409  0.0582 -0.0011]\n",
      " [-0.0133  0.0048  0.3882  0.0402  0.0022]]\n",
      "mean_state_value 0.061591912228755506\n",
      "episode 537/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0167 -0.0082 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0137 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0186 -0.0121  0.0881 -0.0023 -0.0026]\n",
      " [-0.0145  0.1419  0.9429  0.0582 -0.0011]\n",
      " [-0.0133  0.0048  0.3897  0.0405  0.0022]]\n",
      "mean_state_value 0.06174822473867048\n",
      "episode 538/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0167 -0.0082 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0137 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0186 -0.0121  0.0881 -0.0023 -0.0026]\n",
      " [-0.0145  0.1419  0.9448  0.0582 -0.0011]\n",
      " [-0.0133  0.0048  0.3913  0.0409  0.0023]]\n",
      "mean_state_value 0.061904612515213404\n",
      "episode 539/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0167 -0.0082 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0138 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0186 -0.0121  0.0881 -0.0023 -0.0026]\n",
      " [-0.0145  0.1419  0.9468  0.0582 -0.0011]\n",
      " [-0.0133  0.0048  0.3928  0.0412  0.0023]]\n",
      "mean_state_value 0.06206102041836239\n",
      "episode 540/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0166 -0.0082 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0138 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0186 -0.0121  0.0881 -0.0023 -0.0026]\n",
      " [-0.0145  0.1419  0.9488  0.0582 -0.0011]\n",
      " [-0.0133  0.0048  0.3944  0.0415  0.0023]]\n",
      "mean_state_value 0.062217559106700335\n",
      "episode 541/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 19\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 2 2 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0166 -0.0082 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0138 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0186 -0.0121  0.0881 -0.0023 -0.0026]\n",
      " [-0.0145  0.1419  0.9507  0.0582 -0.0011]\n",
      " [-0.0133  0.0048  0.3959  0.0419  0.0024]]\n",
      "mean_state_value 0.06237214182801348\n",
      "episode 542/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0166 -0.0082 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0138 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0186 -0.0121  0.0881 -0.0023 -0.0026]\n",
      " [-0.0145  0.1419  0.9527  0.0582 -0.0011]\n",
      " [-0.0133  0.0048  0.3974  0.0422  0.0024]]\n",
      "mean_state_value 0.06252861578939484\n",
      "episode 543/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0166 -0.0082 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0138 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0186 -0.0121  0.0881 -0.0023 -0.0026]\n",
      " [-0.0145  0.1419  0.9547  0.0582 -0.0011]\n",
      " [-0.0133  0.0048  0.399   0.0426  0.0025]]\n",
      "mean_state_value 0.06268537988854239\n",
      "episode 544/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 2]\n",
      " [1 0 0 0 2]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0166 -0.0082 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0138 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0185 -0.0121  0.0881 -0.0023 -0.0026]\n",
      " [-0.0145  0.1419  0.9566  0.0582 -0.0011]\n",
      " [-0.0133  0.0048  0.4005  0.0429  0.0025]]\n",
      "mean_state_value 0.0628421954025478\n",
      "episode 545/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0166 -0.0082 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0138 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0185 -0.0121  0.0881 -0.0023 -0.0026]\n",
      " [-0.0145  0.1419  0.9586  0.0582 -0.0011]\n",
      " [-0.0133  0.0048  0.4021  0.0433  0.0025]]\n",
      "mean_state_value 0.0629988961725612\n",
      "episode 546/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0166 -0.0082 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0138 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0185 -0.0121  0.0881 -0.0023 -0.0026]\n",
      " [-0.0145  0.1419  0.9605  0.0582 -0.0011]\n",
      " [-0.0133  0.0048  0.4036  0.0436  0.0026]]\n",
      "mean_state_value 0.06315583095422304\n",
      "episode 547/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0166 -0.0082 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0138 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0185 -0.0121  0.0881 -0.0023 -0.0026]\n",
      " [-0.0145  0.1419  0.9625  0.0582 -0.0011]\n",
      " [-0.0133  0.0048  0.4051  0.044   0.0026]]\n",
      "mean_state_value 0.06331289606518466\n",
      "episode 548/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0166 -0.0082 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0138 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0185 -0.0121  0.0881 -0.0023 -0.0026]\n",
      " [-0.0145  0.1419  0.9645  0.0582 -0.0011]\n",
      " [-0.0133  0.0048  0.4067  0.0443  0.0027]]\n",
      "mean_state_value 0.06346982362189832\n",
      "episode 549/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0166 -0.0082 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0138 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0185 -0.0121  0.0881 -0.0023 -0.0026]\n",
      " [-0.0145  0.1419  0.9664  0.0582 -0.0011]\n",
      " [-0.0133  0.0048  0.4082  0.0447  0.0027]]\n",
      "mean_state_value 0.06362703922763104\n",
      "episode 550/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0166 -0.0082 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0138 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0185 -0.0121  0.0881 -0.0023 -0.0026]\n",
      " [-0.0145  0.1419  0.9684  0.0582 -0.0011]\n",
      " [-0.0133  0.0048  0.4098  0.0451  0.0027]]\n",
      "mean_state_value 0.06378433007799185\n",
      "episode 551/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0165 -0.0082 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0138 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0185 -0.0121  0.0881 -0.0023 -0.0026]\n",
      " [-0.0145  0.1419  0.9704  0.0582 -0.0011]\n",
      " [-0.0133  0.0048  0.4113  0.0454  0.0028]]\n",
      "mean_state_value 0.06394169617114115\n",
      "episode 552/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0165 -0.0082 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0138 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0185 -0.0121  0.0881 -0.0023 -0.0026]\n",
      " [-0.0145  0.1419  0.9723  0.0582 -0.0011]\n",
      " [-0.0133  0.0048  0.4129  0.0458  0.0028]]\n",
      "mean_state_value 0.06409913789552005\n",
      "episode 553/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0165 -0.0081 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0138 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0185 -0.0121  0.0881 -0.0023 -0.0026]\n",
      " [-0.0145  0.1419  0.9743  0.0582 -0.0011]\n",
      " [-0.0133  0.0048  0.4144  0.0462  0.0029]]\n",
      "mean_state_value 0.06425644324086673\n",
      "episode 554/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0165 -0.0081 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0138 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0185 -0.0121  0.0881 -0.0023 -0.0026]\n",
      " [-0.0145  0.1419  0.9763  0.0582 -0.0011]\n",
      " [-0.0133  0.0048  0.416   0.0465  0.0029]]\n",
      "mean_state_value 0.06441403505133442\n",
      "episode 555/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0165 -0.0081 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0138 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0185 -0.0121  0.0881 -0.0023 -0.0026]\n",
      " [-0.0145  0.1419  0.9782  0.0582 -0.0011]\n",
      " [-0.0133  0.0048  0.4175  0.0469  0.003 ]]\n",
      "mean_state_value 0.0645716470780871\n",
      "episode 556/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0165 -0.0081 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0138 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0185 -0.0121  0.0881 -0.0023 -0.0026]\n",
      " [-0.0145  0.1419  0.9802  0.0582 -0.0011]\n",
      " [-0.0133  0.0048  0.419   0.0473  0.003 ]]\n",
      "mean_state_value 0.0647293897496275\n",
      "episode 557/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0165 -0.0081 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0138 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0185 -0.0121  0.0881 -0.0023 -0.0026]\n",
      " [-0.0145  0.1419  0.9822  0.0582 -0.001 ]\n",
      " [-0.0133  0.0048  0.4206  0.0476  0.003 ]]\n",
      "mean_state_value 0.06488715229779507\n",
      "episode 558/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0165 -0.0081 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0138 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0185 -0.0121  0.0881 -0.0023 -0.0026]\n",
      " [-0.0145  0.1419  0.9841  0.0582 -0.001 ]\n",
      " [-0.0133  0.0048  0.4221  0.048   0.0031]]\n",
      "mean_state_value 0.06504470452837678\n",
      "episode 559/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0165 -0.0081 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0138 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0184 -0.0121  0.0881 -0.0023 -0.0026]\n",
      " [-0.0145  0.1419  0.9861  0.0582 -0.001 ]\n",
      " [-0.0133  0.0048  0.4237  0.0484  0.0031]]\n",
      "mean_state_value 0.06520233259568413\n",
      "episode 560/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0165 -0.0081 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0139 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0184 -0.0121  0.0881 -0.0023 -0.0026]\n",
      " [-0.0145  0.1419  0.988   0.0582 -0.001 ]\n",
      " [-0.0133  0.0048  0.4252  0.0487  0.0032]]\n",
      "mean_state_value 0.06536032088455238\n",
      "episode 561/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 2]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0164 -0.0081 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0139 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0184 -0.0121  0.0881 -0.0023 -0.0026]\n",
      " [-0.0145  0.1419  0.99    0.0582 -0.0011]\n",
      " [-0.0133  0.0048  0.4268  0.0491  0.0032]]\n",
      "mean_state_value 0.06551656935372174\n",
      "episode 562/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0164 -0.0081 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0139 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0184 -0.0121  0.0881 -0.0023 -0.0026]\n",
      " [-0.0145  0.1419  0.992   0.0582 -0.0011]\n",
      " [-0.0133  0.0048  0.4283  0.0495  0.0033]]\n",
      "mean_state_value 0.06567442425512858\n",
      "episode 563/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0164 -0.0081 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0139 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0184 -0.0121  0.0881 -0.0023 -0.0026]\n",
      " [-0.0145  0.1419  0.9939  0.0582 -0.0011]\n",
      " [-0.0133  0.0048  0.4299  0.0499  0.0033]]\n",
      "mean_state_value 0.06583263828594385\n",
      "episode 564/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0164 -0.0081 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0139 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0184 -0.0121  0.0881 -0.0023 -0.0026]\n",
      " [-0.0145  0.1419  0.9959  0.0582 -0.0011]\n",
      " [-0.0133  0.0048  0.4314  0.0502  0.0034]]\n",
      "mean_state_value 0.06599092759093378\n",
      "episode 565/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 18\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 2 0 2]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0164 -0.0081 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0139 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0184 -0.0121  0.0881 -0.0023 -0.0026]\n",
      " [-0.0145  0.1419  0.9979  0.0582 -0.0011]\n",
      " [-0.0133  0.0048  0.433   0.0506  0.0034]]\n",
      "mean_state_value 0.06614899273107985\n",
      "episode 566/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0164 -0.0081 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0139 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0184 -0.0121  0.0881 -0.0023 -0.0026]\n",
      " [-0.0145  0.1419  0.9998  0.0582 -0.0011]\n",
      " [-0.0133  0.0048  0.4345  0.051   0.0035]]\n",
      "mean_state_value 0.06630715024479558\n",
      "episode 567/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0164 -0.0081 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0139 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0184 -0.0121  0.0881 -0.0023 -0.0026]\n",
      " [-0.0145  0.1419  1.0018  0.0582 -0.0011]\n",
      " [-0.0133  0.0048  0.4361  0.0514  0.0035]]\n",
      "mean_state_value 0.06646571998971901\n",
      "episode 568/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0164 -0.0081 -0.0041 -0.0038 -0.0028]\n",
      " [-0.0139 -0.0039 -0.0016 -0.0022 -0.0031]\n",
      " [-0.0184 -0.0121  0.0881 -0.0023 -0.0026]\n",
      " [-0.0145  0.1419  1.0038  0.0582 -0.0011]\n",
      " [-0.0133  0.0048  0.4376  0.0518  0.0035]]\n",
      "mean_state_value 0.06662402879457384\n",
      "episode 569/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 0 1]\n",
      " [2 0 0 0 2]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬅️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.6400e-02 -8.1000e-03 -4.1000e-03 -3.8000e-03 -2.8000e-03]\n",
      " [-1.3900e-02 -3.9000e-03 -1.6000e-03 -2.2000e-03 -3.2000e-03]\n",
      " [-1.8400e-02 -1.2100e-02  8.8100e-02 -2.3000e-03 -2.6000e-03]\n",
      " [-1.4500e-02  1.4190e-01  1.0057e+00  5.8200e-02 -1.0000e-03]\n",
      " [-1.3300e-02  4.8000e-03  4.3920e-01  5.2100e-02  3.6000e-03]]\n",
      "mean_state_value 0.06677768048345606\n",
      "episode 570/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 19\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 1 1]\n",
      " [2 1 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬅️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.6400e-02 -8.1000e-03 -4.1000e-03 -3.8000e-03 -2.8000e-03]\n",
      " [-1.3900e-02 -3.9000e-03 -1.6000e-03 -2.2000e-03 -3.2000e-03]\n",
      " [-1.8400e-02 -1.2100e-02  8.8100e-02 -2.3000e-03 -2.6000e-03]\n",
      " [-1.4500e-02  1.4190e-01  1.0077e+00  5.8200e-02 -1.0000e-03]\n",
      " [-1.3300e-02  4.8000e-03  4.4070e-01  5.2500e-02  3.6000e-03]]\n",
      "mean_state_value 0.0669361416635883\n",
      "episode 571/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 19\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 1 1]\n",
      " [2 1 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬅️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.6400e-02 -8.1000e-03 -4.1000e-03 -3.8000e-03 -2.8000e-03]\n",
      " [-1.3900e-02 -3.9000e-03 -1.6000e-03 -2.2000e-03 -3.2000e-03]\n",
      " [-1.8400e-02 -1.2200e-02  8.8100e-02 -2.3000e-03 -2.6000e-03]\n",
      " [-1.4500e-02  1.4190e-01  1.0097e+00  5.8200e-02 -1.0000e-03]\n",
      " [-1.3300e-02  4.8000e-03  4.4230e-01  5.2900e-02  3.7000e-03]]\n",
      "mean_state_value 0.06709467864769617\n",
      "episode 572/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 20\n",
      "[[1 1 1 1 1]\n",
      " [2 0 0 1 1]\n",
      " [3 0 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬅️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.6300e-02 -8.1000e-03 -4.1000e-03 -3.8000e-03 -2.8000e-03]\n",
      " [-1.3900e-02 -3.9000e-03 -1.6000e-03 -2.2000e-03 -3.2000e-03]\n",
      " [-1.8400e-02 -1.2200e-02  8.8100e-02 -2.3000e-03 -2.6000e-03]\n",
      " [-1.4500e-02  1.4190e-01  1.0116e+00  5.8200e-02 -1.0000e-03]\n",
      " [-1.3300e-02  4.8000e-03  4.4380e-01  5.3300e-02  3.7000e-03]]\n",
      "mean_state_value 0.0672536780567972\n",
      "episode 573/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 18\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬅️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.6300e-02 -8.1000e-03 -4.1000e-03 -3.8000e-03 -2.8000e-03]\n",
      " [-1.3900e-02 -3.9000e-03 -1.6000e-03 -2.2000e-03 -3.2000e-03]\n",
      " [-1.8400e-02 -1.2200e-02  8.8100e-02 -2.3000e-03 -2.6000e-03]\n",
      " [-1.4500e-02  1.4190e-01  1.0136e+00  5.8200e-02 -1.0000e-03]\n",
      " [-1.3300e-02  4.8000e-03  4.4540e-01  5.3700e-02  3.8000e-03]]\n",
      "mean_state_value 0.06741270042983973\n",
      "episode 574/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 18\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬅️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.6300e-02 -8.1000e-03 -4.1000e-03 -3.8000e-03 -2.8000e-03]\n",
      " [-1.3900e-02 -3.9000e-03 -1.6000e-03 -2.2000e-03 -3.2000e-03]\n",
      " [-1.8400e-02 -1.2200e-02  8.8100e-02 -2.3000e-03 -2.6000e-03]\n",
      " [-1.4500e-02  1.4190e-01  1.0155e+00  5.8200e-02 -1.0000e-03]\n",
      " [-1.3300e-02  4.8000e-03  4.4690e-01  5.4100e-02  3.8000e-03]]\n",
      "mean_state_value 0.06757174297740028\n",
      "episode 575/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 19\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 1 1]\n",
      " [2 1 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬅️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.6300e-02 -8.1000e-03 -4.1000e-03 -3.8000e-03 -2.8000e-03]\n",
      " [-1.3900e-02 -3.9000e-03 -1.6000e-03 -2.2000e-03 -3.2000e-03]\n",
      " [-1.8300e-02 -1.2200e-02  8.8100e-02 -2.3000e-03 -2.6000e-03]\n",
      " [-1.4500e-02  1.4190e-01  1.0175e+00  5.8200e-02 -1.0000e-03]\n",
      " [-1.3300e-02  4.8000e-03  4.4850e-01  5.4500e-02  3.9000e-03]]\n",
      "mean_state_value 0.06773058214885076\n",
      "episode 576/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 18\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬅️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.6300e-02 -8.1000e-03 -4.1000e-03 -3.8000e-03 -2.8000e-03]\n",
      " [-1.3900e-02 -3.9000e-03 -1.6000e-03 -2.2000e-03 -3.2000e-03]\n",
      " [-1.8300e-02 -1.2200e-02  8.8100e-02 -2.3000e-03 -2.6000e-03]\n",
      " [-1.4500e-02  1.4190e-01  1.0195e+00  5.8200e-02 -1.0000e-03]\n",
      " [-1.3300e-02  4.8000e-03  4.5000e-01  5.4900e-02  3.9000e-03]]\n",
      "mean_state_value 0.06788983011842839\n",
      "episode 577/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 19\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 1 1]\n",
      " [2 1 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬅️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.6300e-02 -8.1000e-03 -4.1000e-03 -3.8000e-03 -2.8000e-03]\n",
      " [-1.3900e-02 -3.9000e-03 -1.6000e-03 -2.2000e-03 -3.2000e-03]\n",
      " [-1.8300e-02 -1.2200e-02  8.8100e-02 -2.3000e-03 -2.6000e-03]\n",
      " [-1.4500e-02  1.4190e-01  1.0214e+00  5.8200e-02 -1.0000e-03]\n",
      " [-1.3300e-02  4.8000e-03  4.5160e-01  5.5200e-02  4.0000e-03]]\n",
      "mean_state_value 0.06804882053590397\n",
      "episode 578/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 19\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 1 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬅️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.6300e-02 -8.1000e-03 -4.1000e-03 -3.8000e-03 -2.8000e-03]\n",
      " [-1.3900e-02 -3.9000e-03 -1.6000e-03 -2.2000e-03 -3.2000e-03]\n",
      " [-1.8300e-02 -1.2200e-02  8.8100e-02 -2.3000e-03 -2.6000e-03]\n",
      " [-1.4500e-02  1.4190e-01  1.0234e+00  5.8200e-02 -1.0000e-03]\n",
      " [-1.3300e-02  4.8000e-03  4.5320e-01  5.5600e-02  4.0000e-03]]\n",
      "mean_state_value 0.06820801422408294\n",
      "episode 579/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 1 1]\n",
      " [1 0 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬅️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.6300e-02 -8.1000e-03 -4.1000e-03 -3.8000e-03 -2.8000e-03]\n",
      " [-1.3900e-02 -3.9000e-03 -1.6000e-03 -2.2000e-03 -3.2000e-03]\n",
      " [-1.8300e-02 -1.2200e-02  8.8100e-02 -2.3000e-03 -2.6000e-03]\n",
      " [-1.4500e-02  1.4190e-01  1.0254e+00  5.8200e-02 -1.0000e-03]\n",
      " [-1.3300e-02  4.8000e-03  4.5470e-01  5.6000e-02  4.1000e-03]]\n",
      "mean_state_value 0.06836748737003338\n",
      "episode 580/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 1 1]\n",
      " [1 0 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬅️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.6300e-02 -8.1000e-03 -4.1000e-03 -3.8000e-03 -2.8000e-03]\n",
      " [-1.3900e-02 -3.9000e-03 -1.6000e-03 -2.2000e-03 -3.2000e-03]\n",
      " [-1.8300e-02 -1.2200e-02  8.8100e-02 -2.3000e-03 -2.6000e-03]\n",
      " [-1.4500e-02  1.4190e-01  1.0273e+00  5.8200e-02 -1.0000e-03]\n",
      " [-1.3300e-02  4.8000e-03  4.5630e-01  5.6400e-02  4.1000e-03]]\n",
      "mean_state_value 0.06852703570511051\n",
      "episode 581/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 18\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬅️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.6300e-02 -8.1000e-03 -4.1000e-03 -3.8000e-03 -2.8000e-03]\n",
      " [-1.3900e-02 -3.9000e-03 -1.6000e-03 -2.2000e-03 -3.2000e-03]\n",
      " [-1.8300e-02 -1.2200e-02  8.8100e-02 -2.3000e-03 -2.6000e-03]\n",
      " [-1.4500e-02  1.4190e-01  1.0293e+00  5.8200e-02 -1.0000e-03]\n",
      " [-1.3300e-02  4.8000e-03  4.5780e-01  5.6800e-02  4.2000e-03]]\n",
      "mean_state_value 0.06868665963186976\n",
      "episode 582/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 18\n",
      "[[1 2 1 1 1]\n",
      " [1 0 0 1 1]\n",
      " [1 0 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬅️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.6300e-02 -8.1000e-03 -4.1000e-03 -3.8000e-03 -2.8000e-03]\n",
      " [-1.3900e-02 -3.9000e-03 -1.6000e-03 -2.2000e-03 -3.2000e-03]\n",
      " [-1.8300e-02 -1.2200e-02  8.8100e-02 -2.3000e-03 -2.6000e-03]\n",
      " [-1.4500e-02  1.4190e-01  1.0313e+00  5.8200e-02 -1.0000e-03]\n",
      " [-1.3300e-02  4.8000e-03  4.5940e-01  5.7200e-02  4.3000e-03]]\n",
      "mean_state_value 0.06884278857021442\n",
      "episode 583/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 19\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 1 1]\n",
      " [2 1 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬅️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.6300e-02 -8.0000e-03 -4.1000e-03 -3.8000e-03 -2.8000e-03]\n",
      " [-1.4000e-02 -3.9000e-03 -1.6000e-03 -2.2000e-03 -3.2000e-03]\n",
      " [-1.8300e-02 -1.2200e-02  8.8100e-02 -2.3000e-03 -2.5000e-03]\n",
      " [-1.4500e-02  1.4190e-01  1.0332e+00  5.8200e-02 -1.0000e-03]\n",
      " [-1.3300e-02  4.8000e-03  4.6090e-01  5.7600e-02  4.3000e-03]]\n",
      "mean_state_value 0.06900223203981301\n",
      "episode 584/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 19\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 1 1]\n",
      " [2 1 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬅️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.6300e-02 -8.0000e-03 -4.1000e-03 -3.8000e-03 -2.8000e-03]\n",
      " [-1.4000e-02 -3.9000e-03 -1.6000e-03 -2.2000e-03 -3.2000e-03]\n",
      " [-1.8300e-02 -1.2200e-02  8.8100e-02 -2.3000e-03 -2.5000e-03]\n",
      " [-1.4500e-02  1.4190e-01  1.0352e+00  5.8200e-02 -1.0000e-03]\n",
      " [-1.3300e-02  4.8000e-03  4.6250e-01  5.8000e-02  4.4000e-03]]\n",
      "mean_state_value 0.06916175127875451\n",
      "episode 585/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 19\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 1 1]\n",
      " [2 1 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬅️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.6300e-02 -8.0000e-03 -4.1000e-03 -3.8000e-03 -2.8000e-03]\n",
      " [-1.4000e-02 -3.9000e-03 -1.6000e-03 -2.2000e-03 -3.2000e-03]\n",
      " [-1.8300e-02 -1.2200e-02  8.8100e-02 -2.3000e-03 -2.5000e-03]\n",
      " [-1.4500e-02  1.4190e-01  1.0371e+00  5.8200e-02 -1.0000e-03]\n",
      " [-1.3300e-02  4.8000e-03  4.6410e-01  5.8400e-02  4.4000e-03]]\n",
      "mean_state_value 0.06932134628415951\n",
      "episode 586/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 19\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 1 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬅️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.6300e-02 -8.0000e-03 -4.1000e-03 -3.8000e-03 -2.8000e-03]\n",
      " [-1.4000e-02 -3.9000e-03 -1.6000e-03 -2.2000e-03 -3.2000e-03]\n",
      " [-1.8300e-02 -1.2200e-02  8.8100e-02 -2.3000e-03 -2.5000e-03]\n",
      " [-1.4500e-02  1.4190e-01  1.0391e+00  5.8200e-02 -1.0000e-03]\n",
      " [-1.3300e-02  4.8000e-03  4.6560e-01  5.8800e-02  4.5000e-03]]\n",
      "mean_state_value 0.06948114375509698\n",
      "episode 587/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 18\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬅️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.6300e-02 -8.0000e-03 -4.2000e-03 -3.8000e-03 -2.8000e-03]\n",
      " [-1.4000e-02 -3.9000e-03 -1.6000e-03 -2.2000e-03 -3.2000e-03]\n",
      " [-1.8300e-02 -1.2200e-02  8.8100e-02 -2.3000e-03 -2.5000e-03]\n",
      " [-1.4500e-02  1.4190e-01  1.0411e+00  5.8200e-02 -1.0000e-03]\n",
      " [-1.3300e-02  4.8000e-03  4.6720e-01  5.9200e-02  4.5000e-03]]\n",
      "mean_state_value 0.06964116383343245\n",
      "episode 588/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 19\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 1 1]\n",
      " [2 1 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬅️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.630e-02 -8.000e-03 -4.200e-03 -3.800e-03 -2.800e-03]\n",
      " [-1.400e-02 -3.900e-03 -1.600e-03 -2.200e-03 -3.200e-03]\n",
      " [-1.830e-02 -1.220e-02  8.810e-02 -2.300e-03 -2.500e-03]\n",
      " [-1.450e-02  1.419e-01  1.043e+00  5.820e-02 -1.000e-03]\n",
      " [-1.330e-02  4.800e-03  4.687e-01  5.960e-02  4.600e-03]]\n",
      "mean_state_value 0.06980098546546738\n",
      "episode 589/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 1 1]\n",
      " [1 0 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬅️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.630e-02 -8.000e-03 -4.200e-03 -3.800e-03 -2.800e-03]\n",
      " [-1.400e-02 -3.900e-03 -1.600e-03 -2.200e-03 -3.200e-03]\n",
      " [-1.830e-02 -1.220e-02  8.810e-02 -2.300e-03 -2.500e-03]\n",
      " [-1.450e-02  1.419e-01  1.045e+00  5.820e-02 -1.000e-03]\n",
      " [-1.330e-02  4.800e-03  4.703e-01  6.000e-02  4.600e-03]]\n",
      "mean_state_value 0.06996121045365405\n",
      "episode 590/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 18\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬅️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.630e-02 -8.000e-03 -4.200e-03 -3.800e-03 -2.800e-03]\n",
      " [-1.400e-02 -3.900e-03 -1.600e-03 -2.200e-03 -3.200e-03]\n",
      " [-1.830e-02 -1.220e-02  8.810e-02 -2.300e-03 -2.500e-03]\n",
      " [-1.450e-02  1.419e-01  1.047e+00  5.820e-02 -1.000e-03]\n",
      " [-1.330e-02  4.800e-03  4.718e-01  6.040e-02  4.700e-03]]\n",
      "mean_state_value 0.07012145610503466\n",
      "episode 591/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 19\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 1 1]\n",
      " [2 1 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬅️\n",
      "⬆️🔄⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.6300e-02 -8.0000e-03 -4.2000e-03 -3.8000e-03 -2.8000e-03]\n",
      " [-1.4000e-02 -3.9000e-03 -1.6000e-03 -2.2000e-03 -3.2000e-03]\n",
      " [-1.8200e-02 -1.2200e-02  8.8100e-02 -2.3000e-03 -2.5000e-03]\n",
      " [-1.4500e-02  1.4190e-01  1.0489e+00  5.8200e-02 -9.0000e-04]\n",
      " [-1.3300e-02  4.8000e-03  4.7340e-01  6.0900e-02  4.7000e-03]]\n",
      "mean_state_value 0.07028164631707817\n",
      "episode 592/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 1 1]\n",
      " [1 0 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬅️\n",
      "⬆️🔄⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.6200e-02 -8.0000e-03 -4.2000e-03 -3.8000e-03 -2.8000e-03]\n",
      " [-1.4000e-02 -3.9000e-03 -1.6000e-03 -2.2000e-03 -3.2000e-03]\n",
      " [-1.8200e-02 -1.2200e-02  8.8100e-02 -2.3000e-03 -2.5000e-03]\n",
      " [-1.4500e-02  1.4190e-01  1.0509e+00  5.8200e-02 -9.0000e-04]\n",
      " [-1.3300e-02  4.8000e-03  4.7500e-01  6.1300e-02  4.8000e-03]]\n",
      "mean_state_value 0.07044209681096236\n",
      "episode 593/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 18\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬅️\n",
      "⬆️🔄⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.6200e-02 -8.0000e-03 -4.2000e-03 -3.7000e-03 -2.8000e-03]\n",
      " [-1.4000e-02 -3.9000e-03 -1.6000e-03 -2.2000e-03 -3.2000e-03]\n",
      " [-1.8200e-02 -1.2200e-02  8.8100e-02 -2.3000e-03 -2.5000e-03]\n",
      " [-1.4500e-02  1.4190e-01  1.0528e+00  5.8200e-02 -9.0000e-04]\n",
      " [-1.3300e-02  4.8000e-03  4.7650e-01  6.1700e-02  4.9000e-03]]\n",
      "mean_state_value 0.07060262287988546\n",
      "episode 594/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 57\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 1 53  0  0  0]\n",
      " [ 0  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬅️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.6200e-02 -8.0000e-03 -4.2000e-03 -3.7000e-03 -2.8000e-03]\n",
      " [-1.4000e-02 -3.9000e-03 -1.6000e-03 -2.2000e-03 -3.2000e-03]\n",
      " [-1.8200e-02 -1.2200e-02  8.8100e-02 -2.3000e-03 -2.5000e-03]\n",
      " [-1.4500e-02  1.4380e-01  1.0548e+00  5.8200e-02 -9.0000e-04]\n",
      " [-1.3300e-02  4.8000e-03  4.7650e-01  6.1700e-02  4.9000e-03]]\n",
      "mean_state_value 0.07075602222228565\n",
      "episode 595/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 18\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬅️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.6200e-02 -8.0000e-03 -4.2000e-03 -3.7000e-03 -2.8000e-03]\n",
      " [-1.4000e-02 -3.9000e-03 -1.6000e-03 -2.2000e-03 -3.2000e-03]\n",
      " [-1.8200e-02 -1.2200e-02  8.8100e-02 -2.3000e-03 -2.5000e-03]\n",
      " [-1.4500e-02  1.4380e-01  1.0568e+00  5.8200e-02 -9.0000e-04]\n",
      " [-1.3300e-02  4.8000e-03  4.7810e-01  6.2100e-02  4.9000e-03]]\n",
      "mean_state_value 0.07091669969048846\n",
      "episode 596/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 1 1]\n",
      " [1 0 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬅️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.6200e-02 -8.0000e-03 -4.2000e-03 -3.7000e-03 -2.8000e-03]\n",
      " [-1.4000e-02 -3.9000e-03 -1.6000e-03 -2.2000e-03 -3.2000e-03]\n",
      " [-1.8200e-02 -1.2200e-02  8.8100e-02 -2.3000e-03 -2.5000e-03]\n",
      " [-1.4500e-02  1.4380e-01  1.0587e+00  5.8200e-02 -9.0000e-04]\n",
      " [-1.3300e-02  4.8000e-03  4.7960e-01  6.2500e-02  5.0000e-03]]\n",
      "mean_state_value 0.07107745190712828\n",
      "episode 597/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 18\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬅️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.6200e-02 -8.0000e-03 -4.2000e-03 -3.7000e-03 -2.8000e-03]\n",
      " [-1.4000e-02 -3.9000e-03 -1.6000e-03 -2.2000e-03 -3.2000e-03]\n",
      " [-1.8200e-02 -1.2200e-02  8.8100e-02 -2.3000e-03 -2.5000e-03]\n",
      " [-1.4500e-02  1.4380e-01  1.0607e+00  5.8200e-02 -9.0000e-04]\n",
      " [-1.3300e-02  4.8000e-03  4.8120e-01  6.2900e-02  5.0000e-03]]\n",
      "mean_state_value 0.07123827969158407\n",
      "episode 598/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 19\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 1 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬅️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.6200e-02 -8.0000e-03 -4.2000e-03 -3.7000e-03 -2.8000e-03]\n",
      " [-1.4000e-02 -3.9000e-03 -1.6000e-03 -2.2000e-03 -3.2000e-03]\n",
      " [-1.8200e-02 -1.2200e-02  8.8100e-02 -2.3000e-03 -2.5000e-03]\n",
      " [-1.4500e-02  1.4380e-01  1.0627e+00  5.8200e-02 -9.0000e-04]\n",
      " [-1.3300e-02  4.8000e-03  4.8280e-01  6.3300e-02  5.1000e-03]]\n",
      "mean_state_value 0.07139898322670163\n",
      "episode 599/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 19\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 1 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️➡️⬇️\n",
      "⬆️⏪⏩️⬇️⬅️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.6200e-02 -8.0000e-03 -4.2000e-03 -3.7000e-03 -2.8000e-03]\n",
      " [-1.4000e-02 -3.9000e-03 -1.6000e-03 -2.2000e-03 -3.2000e-03]\n",
      " [-1.8200e-02 -1.2200e-02  8.8100e-02 -2.3000e-03 -2.5000e-03]\n",
      " [-1.4500e-02  1.4380e-01  1.0646e+00  5.8200e-02 -9.0000e-04]\n",
      " [-1.3300e-02  4.8000e-03  4.8430e-01  6.3700e-02  5.1000e-03]]\n",
      "mean_state_value 0.0715597623655619\n"
     ]
    }
   ],
   "source": [
    "action_value = SARSA(gridworld)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "4c1f5403-06f9-499a-b0c8-e628b4b78365",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-04-28T07:22:29.530116Z",
     "start_time": "2025-04-28T07:22:29.525540Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[-1.75940987e-01, -1.33428025e-02, -1.60646127e-02,\n",
       "        -1.72018799e-01, -1.43798542e-02],\n",
       "       [-9.34188536e-02, -8.71276044e-03, -7.88449097e-01,\n",
       "        -1.01330272e-02, -9.25555985e-03],\n",
       "       [-1.08105106e-01, -6.38234468e-03, -6.86710957e-01,\n",
       "        -8.14787984e-03, -6.93737446e-03],\n",
       "       [-1.93489047e-01, -1.07985718e-02, -4.58246183e-03,\n",
       "        -9.69142804e-03, -4.79661582e-03],\n",
       "       [-1.53431259e-01, -1.83861099e-01, -2.11672102e-03,\n",
       "        -9.13132028e-03, -1.09986155e-02],\n",
       "       [-9.76594606e-03, -1.11466679e+00, -1.01572901e-02,\n",
       "        -7.14231799e-02, -1.64002291e-02],\n",
       "       [-1.96835532e-03, -1.49180590e-01, -3.13522660e-03,\n",
       "        -2.05577237e-03, -1.49177500e-01],\n",
       "       [-2.62906471e-04, -2.17432064e-04, -8.94575081e-02,\n",
       "        -1.48974166e-01, -1.09472530e-01],\n",
       "       [-2.01283525e-03, -2.33529306e-03, -1.89388537e-03,\n",
       "        -5.36382377e-01, -3.13140945e-03],\n",
       "       [-4.01687292e-03, -1.51570935e-01, -4.58113817e-03,\n",
       "        -4.63698268e-03, -3.91621441e-03],\n",
       "       [-1.94799403e-02, -2.27960123e-02, -2.30018383e-02,\n",
       "        -3.46439512e-01, -2.21004287e-02],\n",
       "       [-5.64152533e-01, -5.80816798e-01, -5.14896789e-01,\n",
       "        -1.26623325e-02, -1.35606708e-02],\n",
       "       [-3.99402251e-02, -5.96420051e-05,  9.07999751e-02,\n",
       "        -1.41207524e-04, -2.98687457e-02],\n",
       "       [-1.04643235e-03, -9.39484494e-04, -2.46474192e-01,\n",
       "        -2.26798091e-01, -1.05454420e-03],\n",
       "       [-4.33317545e-03, -1.24986829e-01, -2.30363351e-03,\n",
       "        -2.40109854e-03, -2.56842919e-03],\n",
       "       [-1.41199120e-02, -1.06496180e+00, -1.42001838e-02,\n",
       "        -1.17595722e-01, -1.43586594e-02],\n",
       "       [-6.68123725e-04,  1.51820404e-01, -1.78835085e-01,\n",
       "        -4.38792529e-04, -1.58842278e-01],\n",
       "       [-2.75380944e-01, -2.36744603e-01,  9.04597312e-04,\n",
       "        -2.36882796e-01,  1.06421014e+00],\n",
       "       [-9.88329233e-05, -1.33701580e-07, -8.18643947e-05,\n",
       "         6.36998609e-02, -2.99691390e-02],\n",
       "       [-3.47444192e-04, -2.86868988e-02, -8.70114402e-05,\n",
       "        -2.66028791e-01, -6.64827839e-04],\n",
       "       [-1.66659829e-02, -7.51671032e-01, -7.20206026e-02,\n",
       "        -7.00003641e-02, -1.79176415e-02],\n",
       "       [-1.58315758e-01,  4.57494708e-03, -1.00697981e-02,\n",
       "        -1.12065010e-04, -6.99088015e-02],\n",
       "       [ 4.70059860e-01, -1.16131803e-04, -1.27960103e-02,\n",
       "        -1.48968073e-01,  3.33593609e-04],\n",
       "       [-1.48566573e-01, -3.57212077e-05, -1.89350039e-02,\n",
       "         6.16337851e-02, -9.36999992e-05],\n",
       "       [-4.53061130e-04, -2.86535003e-02, -3.05694301e-02,\n",
       "         4.84839962e-03, -3.98256839e-04]])"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "action_value"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
